diff --git a/OgreMain/include/OgreHardwareVertexBuffer.h b/OgreMain/include/OgreHardwareVertexBuffer.h
index 18e4bae64db..3d2cbb387aa 100644
--- a/OgreMain/include/OgreHardwareVertexBuffer.h
+++ b/OgreMain/include/OgreHardwareVertexBuffer.h
@@ -118,22 +118,24 @@ namespace Ogre {
         VET_FLOAT4 = 3,
         /// alias to more specific colour type - use the current rendersystem's colour packing
         VET_COLOUR = 4,
-        VET_SHORT1 = 5,
+        VET_SHORT1 = 5,  /// AVOID (see note below)
         VET_SHORT2 = 6,
-        VET_SHORT3 = 7,
+        VET_SHORT3 = 7,  /// AVOID (see note below)
         VET_SHORT4 = 8,
         VET_UBYTE4 = 9,
         /// D3D style compact colour
         VET_COLOUR_ARGB = 10,
         /// GL style compact colour
         VET_COLOUR_ABGR = 11,
+
+        // the following are not universally supported on all hardware:
         VET_DOUBLE1 = 12,
         VET_DOUBLE2 = 13,
         VET_DOUBLE3 = 14,
         VET_DOUBLE4 = 15,
-        VET_USHORT1 = 16,
+        VET_USHORT1 = 16,  /// AVOID (see note below)
         VET_USHORT2 = 17,
-        VET_USHORT3 = 18,
+        VET_USHORT3 = 18,  /// AVOID (see note below)
         VET_USHORT4 = 19,      
         VET_INT1 = 20,
         VET_INT2 = 21,
@@ -142,7 +144,21 @@ namespace Ogre {
         VET_UINT1 = 24,
         VET_UINT2 = 25,
         VET_UINT3 = 26,
-        VET_UINT4 = 27
+        VET_UINT4 = 27,
+        VET_BYTE4 = 28,  // signed bytes
+        // normalized types (range is either 0 to 1 or -1 to 1)
+        VET_BYTE4_NORM = 29,   // signed normalized bytes
+        VET_UBYTE4_NORM = 30,  // unsigned normalized bytes
+        VET_SHORT2_NORM = 31,  // signed normalized shorts
+        VET_SHORT4_NORM = 32,
+        VET_USHORT2_NORM = 33, // unsigned normalized shorts
+        VET_USHORT4_NORM = 34
+
+        // Note that SHORT1, SHORT3, USHORT1 and USHORT3 should never be used
+        // because they aren't supported on any known hardware -- their size
+        // is not a multiple of 4 bytes.
+        // These values should be removed when possible.  Try to avoid breaking the mesh
+        // serializer though.
     };
 
     /** This class declares the usage of a single vertex buffer as a component
@@ -187,14 +203,17 @@ namespace Ogre {
         size_t getSize(void) const;
         /// Utility method for helping to calculate offsets
         static size_t getTypeSize(VertexElementType etype);
-        /// Utility method which returns the count of values in a given type
+        /// Utility method which returns the count of values in a given type (result for colors may be counter-intuitive)
         static unsigned short getTypeCount(VertexElementType etype);
-        /** Simple converter function which will turn a single-value type into a
-            multi-value type based on a parameter.
+        /** Simple converter function which will return a type large enough to hold 'count' values
+            of the same type as the values in 'baseType'.  The 'baseType' parameter should have the
+            smallest count available.  The return type may have the count rounded up to the next multiple
+            of 4 bytes.  Byte types will always return a 4-count type, while short types will return either
+            a 2-count or 4-count type.
         */
         static VertexElementType multiplyTypeCount(VertexElementType baseType, unsigned short count);
-        /** Simple converter function which will a type into it's single-value
-            equivalent - makes switches on type easier.
+        /** Simple converter function which will turn a type into it's single-value (or lowest multiple-value)
+            equivalent - makes switches on type easier.  May give counter-intuitive results with bytes or shorts.
         */
         static VertexElementType getBaseType(VertexElementType multiType);
 
diff --git a/OgreMain/include/OgreMeshManager.h b/OgreMain/include/OgreMeshManager.h
index 3be4992030c..2ebfa48b559 100644
--- a/OgreMain/include/OgreMeshManager.h
+++ b/OgreMain/include/OgreMeshManager.h
@@ -34,6 +34,7 @@ THE SOFTWARE.
 #include "OgreSingleton.h"
 #include "OgreVector3.h"
 #include "OgreHardwareBuffer.h"
+#include "OgreHardwareVertexBuffer.h"
 #include "OgrePatchSurface.h"
 #include "OgreHeaderPrefix.h"
 
@@ -392,6 +393,26 @@ namespace Ogre {
         /// @copydoc Singleton::getSingleton()
         static MeshManager* getSingletonPtr(void);
 
+        /** Gets the base element type used for blend weights in vertex buffers.
+        @remarks
+        See the remarks below for SetBlendWeightsBaseElementType().
+        */
+        VertexElementType getBlendWeightsBaseElementType() const;
+
+        /** sets the base element type used for blend weights in vertex buffers.
+        @remarks
+        This takes effect when meshes are loaded.  Default is VET_FLOAT1.
+        Valid values are:
+        VET_UBYTE4:        8-bit blend weights.  Lowest memory cost but may have precision issues.  Shader must multiply incoming blend weights with 1/255.  No software skinning.
+        VET_UBYTE4_NORM:   8-bit blend weights.  Lowest memory cost but may have precision issues.  Requires SM2.0+ vertex shader.  No software skinning.
+        VET_USHORT2:       16-bit blend weights.  Shader must multiply incoming blend weights with 1/65535.  No software skinning.
+        VET_USHORT2_NORM:  16-bit blend weights.  Requires SM2.0+ vertex shader.  No software skinning.
+        VET_SHORT2:        15-bit blend weights.  Shader must multiply incoming blend weights with 1/32767.  No software skinning.
+        VET_SHORT2_NORM:   15-bit blend weights.  May work on platforms that do not support VET_USHORT2_NORM.  No software skinning.
+        VET_FLOAT1:        23-bit blend weights.  Highest memory cost.  Supports hardware and software skinning.
+        */
+        void setBlendWeightsBaseElementType( VertexElementType vet );
+
         /** Gets the factor by which the bounding box of an entity is padded.
             Default is 0.01
         */
@@ -469,6 +490,9 @@ namespace Ogre {
         /** Utility method for manual loading a curved illusion plane */
         void loadManualCurvedIllusionPlane(Mesh* pMesh, MeshBuildParams& params);
 
+        // element type for blend weights in vertex buffer (VET_UBYTE4, VET_USHORT1, or VET_FLOAT1)
+        VertexElementType mBlendWeightsBaseElementType;
+
         bool mPrepAllMeshesForShadowVolumes;
     
         //the factor by which the bounding box of an entity is padded   
diff --git a/OgreMain/src/OgreHardwareVertexBuffer.cpp b/OgreMain/src/OgreHardwareVertexBuffer.cpp
index b5c69962b5b..25104cc96d1 100644
--- a/OgreMain/src/OgreHardwareVertexBuffer.cpp
+++ b/OgreMain/src/OgreHardwareVertexBuffer.cpp
@@ -149,39 +149,38 @@ namespace Ogre {
         case VET_DOUBLE4:
             return sizeof(double)*4;
         case VET_SHORT1:
-            return sizeof(short);
-        case VET_SHORT2:
-            return sizeof(short)*2;
-        case VET_SHORT3:
-            return sizeof(short)*3;
-        case VET_SHORT4:
-            return sizeof(short)*4;
         case VET_USHORT1:
-            return sizeof(unsigned short);
+            return sizeof( short );
+        case VET_SHORT2:
+        case VET_SHORT2_NORM:
         case VET_USHORT2:
-            return sizeof(unsigned short)*2;
+        case VET_USHORT2_NORM:
+            return sizeof( short ) * 2;
+        case VET_SHORT3:
         case VET_USHORT3:
-            return sizeof(unsigned short)*3;
+            return sizeof( short ) * 3;
+        case VET_SHORT4:
+        case VET_SHORT4_NORM:
         case VET_USHORT4:
-            return sizeof(unsigned short)*4;
+        case VET_USHORT4_NORM:
+            return sizeof( short ) * 4;
         case VET_INT1:
-            return sizeof(int);
-        case VET_INT2:
-            return sizeof(int)*2;
-        case VET_INT3:
-            return sizeof(int)*3;
-        case VET_INT4:
-            return sizeof(int)*4;
         case VET_UINT1:
-            return sizeof(unsigned int);
+            return sizeof( int );
+        case VET_INT2:
         case VET_UINT2:
-            return sizeof(unsigned int)*2;
+            return sizeof( int ) * 2;
+        case VET_INT3:
         case VET_UINT3:
-            return sizeof(unsigned int)*3;
+            return sizeof( int ) * 3;
+        case VET_INT4:
         case VET_UINT4:
-            return sizeof(unsigned int)*4;
+            return sizeof( int ) * 4;
+        case VET_BYTE4:
+        case VET_BYTE4_NORM:
         case VET_UBYTE4:
-            return sizeof(unsigned char)*4;
+        case VET_UBYTE4_NORM:
+            return sizeof(char)*4;
         }
         return 0;
     }
@@ -202,7 +201,9 @@ namespace Ogre {
             return 1;
         case VET_FLOAT2:
         case VET_SHORT2:
+        case VET_SHORT2_NORM:
         case VET_USHORT2:
+        case VET_USHORT2_NORM:
         case VET_UINT2:
         case VET_INT2:
         case VET_DOUBLE2:
@@ -216,11 +217,16 @@ namespace Ogre {
             return 3;
         case VET_FLOAT4:
         case VET_SHORT4:
+        case VET_SHORT4_NORM:
         case VET_USHORT4:
+        case VET_USHORT4_NORM:
         case VET_UINT4:
         case VET_INT4:
         case VET_DOUBLE4:
+        case VET_BYTE4:
         case VET_UBYTE4:
+        case VET_BYTE4_NORM:
+        case VET_UBYTE4_NORM:
             return 4;
         }
         OGRE_EXCEPT(Exception::ERR_INVALIDPARAMS, "Invalid type", 
@@ -230,38 +236,56 @@ namespace Ogre {
     VertexElementType VertexElement::multiplyTypeCount(VertexElementType baseType, 
         unsigned short count)
     {
+        if ( count < 1 || count > 4 )
+        {
+            OGRE_EXCEPT( Exception::ERR_INVALIDPARAMS, "Count out of range",
+                "VertexElement::multiplyTypeCount" );
+        }
         switch (baseType)
         {
         case VET_FLOAT1:
-            switch(count)
-            {
-            case 1:
-                return VET_FLOAT1;
-            case 2:
-                return VET_FLOAT2;
-            case 3:
-                return VET_FLOAT3;
-            case 4:
-                return VET_FLOAT4;
-            default:
-                break;
-            }
-            break;
+        case VET_DOUBLE1:
+        case VET_INT1:
+        case VET_UINT1:
+            // evil enumeration arithmetic
+            return static_cast<VertexElementType>( baseType + count - 1 );
+
         case VET_SHORT1:
-            switch(count)
+        case VET_SHORT2:
+            if ( count <= 2 )
             {
-            case 1:
-                return VET_SHORT1;
-            case 2:
                 return VET_SHORT2;
-            case 3:
-                return VET_SHORT3;
-            case 4:
-                return VET_SHORT4;
-            default:
-                break;
             }
-            break;
+            return VET_SHORT4;
+
+        case VET_USHORT1:
+        case VET_USHORT2:
+            if ( count <= 2 )
+            {
+                return VET_USHORT2;
+            }
+            return VET_USHORT4;
+
+        case VET_SHORT2_NORM:
+            if ( count <= 2 )
+            {
+                return VET_SHORT2_NORM;
+            }
+            return VET_SHORT4_NORM;
+
+        case VET_USHORT2_NORM:
+            if ( count <= 2 )
+            {
+                return VET_USHORT2_NORM;
+            }
+            return VET_USHORT4_NORM;
+
+        case VET_BYTE4:
+        case VET_BYTE4_NORM:
+        case VET_UBYTE4:
+        case VET_UBYTE4_NORM:
+            return baseType;
+
         default:
             break;
         }
@@ -359,8 +383,20 @@ namespace Ogre {
             case VET_USHORT3:
             case VET_USHORT4:
                 return VET_USHORT1;
+            case VET_SHORT2_NORM:
+            case VET_SHORT4_NORM:
+                return VET_SHORT2_NORM;
+            case VET_USHORT2_NORM:
+            case VET_USHORT4_NORM:
+                return VET_USHORT2_NORM;
+            case VET_BYTE4:
+                return VET_BYTE4;
+            case VET_BYTE4_NORM:
+                return VET_BYTE4_NORM;
             case VET_UBYTE4:
                 return VET_UBYTE4;
+            case VET_UBYTE4_NORM:
+                return VET_UBYTE4_NORM;
         };
         // To keep compiler happy
         return VET_FLOAT1;
diff --git a/OgreMain/src/OgreMesh.cpp b/OgreMain/src/OgreMesh.cpp
index 4a9304f5733..08d778e0475 100644
--- a/OgreMain/src/OgreMesh.cpp
+++ b/OgreMain/src/OgreMesh.cpp
@@ -787,7 +787,6 @@ namespace Ogre {
     {
         // Create or reuse blend weight / indexes buffer
         // Indices are always a UBYTE4 no matter how many weights per vertex
-        // Weights are more specific though since they are Reals
         VertexDeclaration* decl = targetVertexData->vertexDeclaration;
         VertexBufferBinding* bind = targetVertexData->vertexBufferBinding;
         unsigned short bindIndex;
@@ -813,10 +812,12 @@ namespace Ogre {
             // Get new binding
             bindIndex = bind->getNextIndex();
         }
-
+        // type of Weights is settable on the MeshManager.
+        VertexElementType weightsBaseType = MeshManager::getSingleton().getBlendWeightsBaseElementType();
+        VertexElementType weightsVertexElemType = VertexElement::multiplyTypeCount( weightsBaseType, numBlendWeightsPerVertex );
         HardwareVertexBufferSharedPtr vbuf =
             HardwareBufferManager::getSingleton().createVertexBuffer(
-                sizeof(unsigned char)*4 + sizeof(float)*numBlendWeightsPerVertex,
+            sizeof( unsigned char ) * 4 + VertexElement::getTypeSize( weightsVertexElemType ),
                 targetVertexData->vertexCount,
                 HardwareBuffer::HBU_STATIC_WRITE_ONLY,
                 true // use shadow buffer
@@ -840,9 +841,7 @@ namespace Ogre {
             const VertexElement& idxElem =
                 decl->insertElement(insertPoint, bindIndex, 0, VET_UBYTE4, VES_BLEND_INDICES);
             const VertexElement& wtElem =
-                decl->insertElement(insertPoint+1, bindIndex, sizeof(unsigned char)*4,
-                VertexElement::multiplyTypeCount(VET_FLOAT1, numBlendWeightsPerVertex),
-                VES_BLEND_WEIGHTS);
+                decl->insertElement(insertPoint+1, bindIndex, sizeof(unsigned char)*4, weightsVertexElemType, VES_BLEND_WEIGHTS);
             pIdxElem = &idxElem;
             pWeightElem = &wtElem;
         }
@@ -853,13 +852,30 @@ namespace Ogre {
             const VertexElement& idxElem =
                 decl->addElement(bindIndex, 0, VET_UBYTE4, VES_BLEND_INDICES);
             const VertexElement& wtElem =
-                decl->addElement(bindIndex, sizeof(unsigned char)*4,
-                VertexElement::multiplyTypeCount(VET_FLOAT1, numBlendWeightsPerVertex),
-                VES_BLEND_WEIGHTS);
+                decl->addElement(bindIndex, sizeof(unsigned char)*4, weightsVertexElemType, VES_BLEND_WEIGHTS );
             pIdxElem = &idxElem;
             pWeightElem = &wtElem;
         }
 
+        unsigned int maxIntWt = 0;
+        // keeping a switch out of the loop
+        switch ( weightsBaseType )
+        {
+            case VET_FLOAT1:
+                break;
+            case VET_UBYTE4:
+            case VET_UBYTE4_NORM:
+                maxIntWt = 0xff;
+                break;
+            case VET_USHORT2:
+            case VET_USHORT2_NORM:
+                maxIntWt = 0xffff;
+                break;
+            case VET_SHORT2:
+            case VET_SHORT2_NORM:
+                maxIntWt = 0x7fff;
+                break;
+        }
         // Assign data
         size_t v;
         VertexBoneAssignmentList::const_iterator i, iend;
@@ -868,31 +884,98 @@ namespace Ogre {
         unsigned char *pBase = static_cast<unsigned char*>(
             vbuf->lock(HardwareBuffer::HBL_DISCARD));
         // Iterate by vertex
-        float *pWeight;
-        unsigned char *pIndex;
         for (v = 0; v < targetVertexData->vertexCount; ++v)
         {
-            /// Convert to specific pointers
-            pWeightElem->baseVertexPointerToElement(pBase, &pWeight);
-            pIdxElem->baseVertexPointerToElement(pBase, &pIndex);
+            // collect the indices/weights in these arrays
+            unsigned char indices[ 4 ] = { 0, 0, 0, 0 };
+            float weights[ 4 ] = { 1.0f, 0.0f, 0.0f, 0.0f };
             for (unsigned short bone = 0; bone < numBlendWeightsPerVertex; ++bone)
             {
                 // Do we still have data for this vertex?
                 if (i != iend && i->second.vertexIndex == v)
                 {
-                    // If so, write weight
-                    *pWeight++ = i->second.weight;
-                    *pIndex++ = static_cast<unsigned char>(boneIndexToBlendIndexMap[i->second.boneIndex]);
+                    // If so, grab weight and index
+                    weights[ bone ] = i->second.weight;
+                    indices[ bone ] = static_cast<unsigned char>( boneIndexToBlendIndexMap[ i->second.boneIndex ] );
                     ++i;
                 }
+            }
+            // if weights are integers,
+            if ( weightsBaseType != VET_FLOAT1 )
+            {
+                // pack the float weights into shorts/bytes
+                unsigned int intWeights[ 4 ];
+                unsigned int sum = 0;
+                const unsigned int wtScale = maxIntWt;  // this value corresponds to a weight of 1.0
+                for ( int ii = 0; ii < 4; ++ii )
+                {
+                    unsigned int bw = static_cast<unsigned int>( weights[ ii ] * wtScale );
+                    intWeights[ ii ] = bw;
+                    sum += bw;
+                }
+                // if the sum doesn't add up due to roundoff error, we need to adjust the intWeights so that the sum is wtScale
+                if ( sum != maxIntWt )
+                {
+                    // find the largest weight (it isn't necessarily the first one...)
+                    int iMaxWeight = 0;
+                    unsigned int maxWeight = 0;
+                    for ( int ii = 0; ii < 4; ++ii )
+                    {
+                        unsigned int bw = intWeights[ ii ];
+                        if ( bw > maxWeight )
+                        {
+                            iMaxWeight = ii;
+                            maxWeight = bw;
+                        }
+                    }
+                    // Adjust the largest weight to make sure the sum is correct.
+                    // The idea is that changing the largest weight will have the smallest effect
+                    // on the ratio of weights.  This works best when there is one dominant weight,
+                    // and worst when 2 or more weights are similar in magnitude.
+                    // A better method could be used to reduce the quantization error, but this is
+                    // being done at run-time so it needs to be quick.
+                    intWeights[ iMaxWeight ] += maxIntWt - sum;
+                }
+
+                // now write the weights
+                if ( weightsBaseType == VET_UBYTE4 || weightsBaseType == VET_UBYTE4_NORM )
+                {
+                    // write out the weights as bytes
+                    unsigned char* pWeight;
+                    pWeightElem->baseVertexPointerToElement( pBase, &pWeight );
+                    // NOTE: always writes out 4 regardless of numBlendWeightsPerVertex
+                    for ( int ii = 0; ii < 4; ++ii )
+                    {
+                        *pWeight++ = static_cast<unsigned char>( intWeights[ ii ] );
+                    }
+                }
                 else
                 {
-                    // Ran out of assignments for this vertex, use weight 0 to indicate empty.
-                    // If no bones are defined (an error in itself) set bone 0 as the assigned bone. 
-                    *pWeight++ = (bone == 0) ? 1.0f : 0.0f;
-                    *pIndex++ = 0;
+                    // write out the weights as shorts
+                    unsigned short* pWeight;
+                    pWeightElem->baseVertexPointerToElement( pBase, &pWeight );
+                    for ( int ii = 0; ii < numBlendWeightsPerVertex; ++ii )
+                    {
+                        *pWeight++ = static_cast<unsigned short>( intWeights[ ii ] );
+                    }
                 }
             }
+            else
+            {
+                // write out the weights as floats
+                float* pWeight;
+                pWeightElem->baseVertexPointerToElement( pBase, &pWeight );
+                for ( int ii = 0; ii < numBlendWeightsPerVertex; ++ii )
+                {
+                    *pWeight++ = weights[ ii ];
+                }
+            }
+            unsigned char* pIndex;
+            pIdxElem->baseVertexPointerToElement( pBase, &pIndex );
+            for ( int ii = 0; ii < 4; ++ii )
+            {
+                *pIndex++ = indices[ ii ];
+            }
             pBase += vbuf->getVertexSize();
         }
 
diff --git a/OgreMain/src/OgreMeshManager.cpp b/OgreMain/src/OgreMeshManager.cpp
index b42a8ac6395..76d31d49849 100644
--- a/OgreMain/src/OgreMeshManager.cpp
+++ b/OgreMain/src/OgreMeshManager.cpp
@@ -54,6 +54,7 @@ namespace Ogre
     MeshManager::MeshManager():
     mBoundsPaddingFactor(0.01), mListener(0)
     {
+        mBlendWeightsBaseElementType = VET_FLOAT1;
         mPrepAllMeshesForShadowVolumes = false;
 
         mLoadOrder = 350.0f;
@@ -965,7 +966,34 @@ namespace Ogre
         return mPrepAllMeshesForShadowVolumes;
     }
     //-----------------------------------------------------------------------
-    Real MeshManager::getBoundsPaddingFactor(void)
+    VertexElementType MeshManager::getBlendWeightsBaseElementType() const
+    {
+        return mBlendWeightsBaseElementType;
+    }
+    //-----------------------------------------------------------------------
+    void MeshManager::setBlendWeightsBaseElementType( VertexElementType vet )
+    {
+        switch ( vet )
+        {
+            case VET_UBYTE4:
+            case VET_UBYTE4_NORM:
+            case VET_USHORT2:
+            case VET_USHORT2_NORM:
+            case VET_SHORT2:
+            case VET_SHORT2_NORM:
+            case VET_FLOAT1:
+                mBlendWeightsBaseElementType = vet;
+                break;
+            default:
+                OGRE_EXCEPT( Exception::ERR_INVALIDPARAMS,
+                    "Unsupported setting for setBlendWeightsBaseElementType",
+                    "MeshManager::setBlendWeightsBaseElementType"
+                    );
+                break;
+        }
+    }
+    //-----------------------------------------------------------------------
+    Real MeshManager::getBoundsPaddingFactor( void )
     {
         return mBoundsPaddingFactor;
     }