From c7ac5321c938edea1d1afc4eccb6a565a9f509d1 Mon Sep 17 00:00:00 2001 From: Erik de Castro Lopo Date: Tue, 22 Sep 2020 08:13:52 +1000 Subject: [PATCH] db-sync: Reject TxMetadata objects containing NUL characters TxMetadata is stored as JSON and that JSON is stored in a 'jsonb' column in PostgreSQL. However, there are limitations to that Postgres 'jsonb' data type. Specifically, it cannot contain Uniciode NUL characters. This temporary fix simply drops TxMetadata JSON objects that would otherwise be rejected by Postgres. Hopefully a better solution will be will be dreamt up and implemented later. Temporary workaround fix for: https://github.com/input-output-hk/cardano-db-sync/issues/297 --- .../src/Cardano/DbSync/Era/Shelley/Insert.hs | 45 +++++++++++++++---- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Insert.hs b/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Insert.hs index 9fa4ff6c6..ef7aef84b 100644 --- a/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Insert.hs +++ b/cardano-db-sync/src/Cardano/DbSync/Era/Shelley/Insert.hs @@ -9,11 +9,13 @@ module Cardano.DbSync.Era.Shelley.Insert ( insertShelleyBlock + , containsUnicodeNul + , safeDecodeUtf8 ) where import Cardano.Prelude -import Cardano.BM.Trace (Trace, logDebug, logError, logInfo) +import Cardano.BM.Trace (Trace, logDebug, logError, logInfo, logWarning) import Cardano.Db (DbWord64 (..)) @@ -38,9 +40,12 @@ import Cardano.DbSync.Util import Cardano.Slotting.Slot (EpochNo (..), EpochSize (..)) import qualified Data.Aeson as Aeson +import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as LBS import qualified Data.Map.Strict as Map +import qualified Data.Text as Text import qualified Data.Text.Encoding as Text +import qualified Data.Text.Encoding.Error as Text import Database.Persist.Sql (SqlBackend) @@ -481,17 +486,39 @@ insertTxMetadata :: (MonadBaseControl IO m, MonadIO m) => Trace IO Text -> DB.TxId -> Shelley.MetaData -> ExceptT DbSyncNodeError (ReaderT SqlBackend m) () -insertTxMetadata _tracer txId (Shelley.MetaData mdmap) = +insertTxMetadata tracer txId (Shelley.MetaData mdmap) = mapM_ insert $ Map.toList mdmap where insert :: (MonadBaseControl IO m, MonadIO m) => (Word64, Shelley.MetaDatum) -> ExceptT DbSyncNodeError (ReaderT SqlBackend m) () - insert (key, md) = - void . lift . DB.insertTxMetadata $ - DB.TxMetadata - { DB.txMetadataKey = DbWord64 key - , DB.txMetadataJson = Text.decodeUtf8 . LBS.toStrict $ Aeson.encode (jsonFromMetadataValue md) - , DB.txMetadataTxId = txId - } + insert (key, md) = do + let jsonbs = LBS.toStrict $ Aeson.encode (jsonFromMetadataValue md) + ejson <- liftIO $ safeDecodeUtf8 jsonbs + case ejson of + Left err -> + liftIO . logWarning tracer $ mconcat + [ "insertTxMetadata: Could not decode to UTF8: ", textShow err ] + Right json -> do + -- See https://github.com/input-output-hk/cardano-db-sync/issues/297 + if containsUnicodeNul json + then liftIO $ logWarning tracer "insertTxMetadata: dropped due to a Unicode NUL character." + else + void . lift . DB.insertTxMetadata $ + DB.TxMetadata + { DB.txMetadataKey = DbWord64 key + , DB.txMetadataJson = json + , DB.txMetadataTxId = txId + } + +safeDecodeUtf8 :: ByteString -> IO (Either Text.UnicodeException Text) +safeDecodeUtf8 bs + | BS.any isNullChar bs = pure $ Left (Text.DecodeError (BS.unpack bs) (Just 0)) + | otherwise = try $ evaluate (Text.decodeUtf8With Text.strictDecode bs) + where + isNullChar :: Char -> Bool + isNullChar ch = ord ch == 0 + +containsUnicodeNul :: Text -> Bool +containsUnicodeNul = Text.isInfixOf "\\u000"