## ETL: bronze to silver

### IMPORT LIBRARY


In [0]:
from pyspark.sql.functions import col, unbase64, cast, from_json, regexp_replace, to_timestamp, from_unixtime
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, ArrayType, MapType, LongType, IntegerType, BooleanType, FloatType
from pyspark.sql.functions import col, from_json, regexp_replace

### DEFINE SCHEMA

In [0]:
item_schema = StructType([
    StructField("main_category", StringType(), True),
    StructField("title", StringType(), True),
    StructField("average_rating", FloatType(), True),
    StructField("rating_number", IntegerType(), True),
    StructField("features", ArrayType(StringType()), True), 
    StructField("description", ArrayType(StringType()), True), 
    StructField("price", StringType(), True), 
    StructField("images", ArrayType(MapType(StringType(), StringType())), True),
    StructField("videos", ArrayType(MapType(StringType(), StringType())), True),
    StructField("store", StringType(), True),
    StructField("categories", ArrayType(StringType()), True),
    StructField("details", MapType(StringType(), StringType()), True),
    StructField("parent_asin", StringType(), True),
    StructField("bought_together", ArrayType(StringType()), True)
])

In [0]:
review_schema = StructType([
    StructField("rating", FloatType(), True),
    StructField("title", StringType(), True),
    StructField("text", StringType(), True),
    StructField("images", ArrayType(StringType()), True), 
    StructField("asin", StringType(), True),
    StructField("parent_asin", StringType(), True),
    StructField("user_id", StringType(), True),
    StructField("timestamp", StringType(), True),
    StructField("helpful_vote", IntegerType(), True),
    StructField("verified_purchase", BooleanType(), True)
])

### Item: bronze to silver

**PROCESSING**

In [0]:
# Start processing items

print("Start processing...")

# Extract
df_items_bronze = spark.table("`bigdata-and-bi`.bronze.items_raw")

# Transform
df_items_transformed = df_items_bronze \
    .withColumn("json_string", col("value").cast("string")) \
    .filter(col("json_string").isNotNull()) \
    .withColumn("parsed_json", from_json(col("json_string"), item_schema)) \
    .select("parsed_json.*")

# Load
# df_items_silver.write \
#     .mode("overwrite") \
#     .saveAsTable("`bigdata-and-bi`.silver.items_clean")
print("--- Display sample ---")
display(df_items_bronze.limit(5))
display(df_items_transformed.limit(5))

Start processing...
--- Display sample ---


key,value,topic,partition,offset,timestamp,timestampType,ingest_ts,ingest_date
MDY4MTM5MDNkZjU0MDlmMDc3M2RiMmM2NGMwZDM1MjU=,eyJtYWluX2NhdGVnb3J5IjogIkJvb2tzIiwgInRpdGxlIjogIk11cmRlciBPbiB0aGUgT3JpZW50IEV4cHJlc3MgKHRleHQgb25seSkgN3RoIChTZXZlbnRoKSBlZGl0aW9uIGJ5IEEuIENocmlzdGllIiwgImF2ZXJhZ2VfcmF0aW5nIjogNC41LCAicmF0aW5nX251bWJlciI6IDMwMTMxLCAiZmVhdHVyZXMiOiBbIkNsYXNzaWMgbXVyZGVyIG15c3RlcnkiXSwgImRlc2NyaXB0aW9uIjogW10sICJwcmljZSI6IDUxLjAsICJpbWFnZXMiOiBbXSwgInZpZGVvcyI6IFtdLCAic3RvcmUiOiAiQS4gQ2hyaXN0aWUgKEF1dGhvcikiLCAiY2F0ZWdvcmllcyI6IFtdLCAiZGV0YWlscyI6IHsiUHVibGlzaGVyIjogIkJhbnRhbTsgN3RoIGVkaXRpb24gKEphbnVhcnkgMSwgMTk4MykiLCAiSW1pdGF0aW9uIExlYXRoZXIiOiAiMjEyIHBhZ2VzIiwgIkl0ZW0gV2VpZ2h0IjogIjEuMDEgcG91bmRzIn0sICJwYXJlbnRfYXNpbiI6ICJCMDA0UllSTzY4In0=,book-metadata,3,226159,2025-11-02T16:57:22.225Z,0,2025-11-02T17:02:23.920Z,2025-11-02
MzViZTI0YTRiNTg2ODM1ZWJmZjNiM2E4M2I3ZjRiZDU=,eyJtYWluX2NhdGVnb3J5IjogIkJvb2tzIiwgInRpdGxlIjogIkNhbGlmb3JuaWEgQ2FsbGVkIFRoZW0gLSBBIFNhZ2EgT2YgR29sZGVuIERheXMgQW5kIFJvYXJpbmcgQ2FtcHMiLCAiYXZlcmFnZV9yYXRpbmciOiA1LjAsICJyYXRpbmdfbnVtYmVyIjogMiwgImZlYXR1cmVzIjogWyJNYW55IG9mIHRoZSBlYXJsaWVzdCBib29rcywgcGFydGljdWxhcmx5IHRob3NlIGRhdGluZyBiYWNrIHRvIHRoZSAxOTAwcyBhbmQgYmVmb3JlLCBhcmUgbm93IGV4dHJlbWVseSBzY2FyY2UgYW5kIGluY3JlYXNpbmdseSBleHBlbnNpdmUuIFdlIGFyZSByZXB1Ymxpc2hpbmcgdGhlc2UgY2xhc3NpYyB3b3JrcyBpbiBhZmZvcmRhYmxlLCBoaWdoIHF1YWxpdHksIG1vZGVybiBlZGl0aW9ucywgdXNpbmcgdGhlIG9yaWdpbmFsIHRleHQgYW5kIGFydHdvcmsuIl0sICJkZXNjcmlwdGlvbiI6IFtdLCAicHJpY2UiOiAxMi45NSwgImltYWdlcyI6IFt7ImxhcmdlIjogImh0dHBzOi8vbS5tZWRpYS1hbWF6b24uY29tL2ltYWdlcy9JLzQxcXRMRmNzZjhMLl9TWDMyMl9CTzEsMjA0LDIwMywyMDBfLmpwZyIsICJ2YXJpYW50IjogIk1BSU4ifV0sICJ2aWRlb3MiOiBbXSwgInN0b3JlIjogIlJvYmVydCBPJ0JyaWVuIChBdXRob3IpIiwgImNhdGVnb3JpZXMiOiBbIkJvb2tzIiwgIkxpdGVyYXR1cmUgJiBGaWN0aW9uIiwgIkxpdGVyYXJ5Il0sICJkZXRhaWxzIjogeyJQdWJsaXNoZXIiOiAiTydCcmllbiBQcmVzcyAoTWFyY2ggMTUsIDIwMDcpIiwgIkxhbmd1YWdlIjogIkVuZ2xpc2giLCAiUGFwZXJiYWNrIjogIjI3MiBwYWdlcyIsICJJU0JOIDEwIjogIjE0MDY3NTY2NzkiLCAiSVNCTiAxMyI6ICI5NzgtMTQwNjc1NjY3OCIsICJJdGVtIFdlaWdodCI6ICIxMi4zIG91bmNlcyIsICJEaW1lbnNpb25zIjogIjUuNSB4IDAuNjEgeCA4LjUgaW5jaGVzIn0sICJwYXJlbnRfYXNpbiI6ICIxNDA2NzU2Njc5In0=,book-metadata,3,226160,2025-11-02T16:57:22.225Z,0,2025-11-02T17:02:23.920Z,2025-11-02
NGEzNjg3MTI0ZWNlNDI3NWYwNWUzYWU3NzVmMzQwNTQ=,eyJtYWluX2NhdGVnb3J5IjogIkJvb2tzIiwgInRpdGxlIjogIkhpZ2ggU2VhcyBDb25mZWRlcmF0ZTogVGhlIExpZmUgYW5kIFRpbWVzIG9mIEpvaG4gTmV3bGFuZCBNYWZmaXR0IChTdHVkaWVzIGluIE1hcml0aW1lIEhpc3RvcnkpIiwgImF2ZXJhZ2VfcmF0aW5nIjogNC43LCAicmF0aW5nX251bWJlciI6IDQsICJmZWF0dXJlcyI6IFsiVGhlIENpdmlsIFdhciBhZHZlbnR1cmVzIG9mIGEgc3dhc2hidWNrbGluZyBzZWEgY2FwdGFpbiB0aGF0wqB3b24gdGhlIENsYXJlbmRvbiBBd2FyZC0tc2VlIFwiQWJvdXQgdGhlIEF1dGhvclwiIHVuZGVywqBcIlJlYWQgbW9yZVwiIGJlbG93IiwgIkNvbnRhaW5zIGFuIG9yaWdpbmFsIG1hcCBkZXNpZ25lZCBieSB0aGUgYXV0aG9yIHRoYXQgcGxvdHMgdGhlIGNydWlzZSBvZiB0aGUgQy5TLlMuIEZsb3JpZGEsIHRvZ2V0aGVyIHdpdGggYSBjb21wbGV0ZSBsaXN0IG9mIHRoZSB2ZXNzZWxzIGNhcHR1cmVkLMKgd2hpY2ggd2FzIHVzZWQgYnkgdGhlIFByZXNzIGFzIGZyb250IGFuZCBiYWNrIGVuZHBhcGVyczvCoGFsc28gY29udGFpbnMgYW4gb3JpZ2luYWzCoGNvbG9yIHNrZXRjaCBjb21taXNzaW9uZWQgYnkgdGhlIGF1dGhvciBvZiB0aGUgYmxvY2thZGUgcnVubmVyIE93bCIsICJBIFwid2lkZWx5IHJlY29nbml6ZWQgYW5kIGNvbXBldGVudCBmdWxsLWxlbmd0aFwiIGJpb2dyYXBoeS0tVGhlIEFtZXJpY2FuIENpdmlsIFdhcjrCoCBBIEhhbmRib29rIG9mIExpdGVyYXR1cmUgYW5kIFJlc2VhcmNoIl0sICJkZXNjcmlwdGlvbiI6IFsiRnJvbSBQdWJsaXNoZXJzIFdlZWtseSIsICJCZWZvcmUgdGhlIENpdmlsIFdhciwgTWFmZml0dCAoMTgxOS0xODg2KSB3YXMgcmVjb2duaXplZCBhcyBvbmUgb2YgdGhlIFUuUy4gTmF2eSdzIGZpbmVzdCBvZmZpY2Vycy4gQWZ0ZXIgam9pbmluZyB0aGUgQ29uZmVkZXJhY3kgaW4gMTg2MSwgTWFmZml0dCBvcmdhbml6ZWQgYmxvY2thZGUtcnVubmluZyBvcGVyYXRpb25zIGZyb20gdGhlIFdlc3QgSW5kaWVzLiBUaGUgaGlnaCBwb2ludCBvZiBoaXMgY2FyZWVyLCBob3dldmVyLCB3YXMgaGlzIGNhcHRhaW5jeSBvZiB0aGUgY29tbWVyY2UtcmFpZGVyIEMuUy5TIC4gRmxvcmlkYS4gSW4gb25seSB0d28gY3J1aXNlcywgTWFmZml0dCB0b29rIDU3IHByaXplcywgd3JlYWtpbmcgb3ZlciAkNCBtaWxsaW9uIHdvcnRoIG9mIGRhbWFnZSB0byBVbmlvbiBzaGlwcGluZy4gUmVhc3NpZ25lZCB0byBibG9ja2FkZS1ydW5uaW5nIGluIDE4NjMsIE1hZmZpdHQgY29udGludWVkIHRvIGJhZmZsZSBZYW5rZWUgY2FwdGFpbnMgYW5kIGJyaW5nIHZpdGFsIG1pbGl0YXJ5IGNhcmdvZXMgaW50byBTb3V0aGVybiBwb3J0cyB1bnRpbCBuZWFybHkgdGhlIGVuZCBvZiB0aGUgd2FyLiBTaGluZ2xldG9uICggSm9obiBUYXlsb3IgV29vZDogU2VhIEdob3N0IG9mIHRoZSBDb25mZWRlcmFjeSApIGhpZ2hsaWdodHMgdGhlIGxlZ2FsIGFuZCBwb2xpdGljYWwgYXNwZWN0cyBvZiBNYWZmaXR0J3Mgb3BlcmF0aW9ucywgd2hpY2ggd2VyZSBzdGlsbCBpbXBlcmZlY3RseSByZWd1bGF0ZWQgYnkgaW50ZXJuYXRpb25hbCBsYXcuIE1hZmZpdCdzIGluZ2VudWl0eSwgY291cmFnZSwgc2VhbWFuc2hpcCBhbmQgaGlnaC1zcGlyaXRlZCBsZWFkZXJzaGlwIHJlbmRlcmVkIGhpbSBhbiBpZGVhbCBoZXJvIGZvciBhIGxvc3QgY2F1c2UuIFBob3RvcyBub3Qgc2VlbiBieSBQVy4gQ29weXJpZ2h0IDE5OTQgUmVlZCBCdXNpbmVzcyBJbmZvcm1hdGlvbiwgSW5jLiIsICJGcm9tIExpYnJhcnkgSm91cm5hbCIsICJNYWZmaXR0J3MgY29sb3JmdWwgY2FyZWVyIGFzIGEgQ29uZmVkZXJhdGUgY29tbWFuZGVyIG9mIHRoZSBjb21tZXJjZSByYWlkZXIgQy5TLlMuIEZsb3JpZGEgYW5kIGFzIGEgYmxvY2thZGUgcnVubmVyIHJlY2VpdmVzIGl0cyBkdWUgaW4gdGhpcyBiaW9ncmFwaGljYWwgcmVjb3VudGluZyBieSBTaGluZ2xldG9uIChSaWNoYXJkIFBldGVyczogQ2hhbXBpb24gb2YgdGhlIE5ldyBTb3V0aCwgTWVyY2VyIFVuaXYuIFByLiwgMTk4NSkuIE1hZmZpdHQncyBkYXJpbmcgYW5kIHNraWxsIGhlbHBlZCBpbiB0aGUgY2FwdHVyZSBvZiBzb21lIDQ1IFVuaW9uIHNoaXBzLCBpbmNsdWRpbmcgMjMgc2VpemVkIGJ5IHRoZSBGbG9yaWRhIGl0c2VsZi4gVGhlc2UgZXhwbG9pdHMgbWFya2VkIGhpbSBhcyBhIHRob3JuIGluIHRoZSBzaWRlIG9mIHRoZSBVbmlvbiwgYXMgZGlkIGhpcyBlZmZvcnRzIHRvIHJ1biB0aGUgYmxvY2thZGUuIFNoaW5nbGV0b24gY29udmV5cyB0aGUgZXhjaXRlbWVudCBvZiBibG9ja2FkZSBydW5uaW5nIGFuZCBjb21tZXJjZSByYWlkaW5nIGluIGhpcyBsaXZlbHkgbmFycmF0aXZlOyBoZSBpcyBsZXNzIGhlbHBmdWwgd2hlbiBpdCBjb21lcyB0byBldmFsdWF0aW5nIHRoZWlyIHNpZ25pZmljYW5jZSB0byB0aGUgQ29uZmVkZXJhdGUgd2FyIGVmZm9ydCwgZm9yIG1hbnkgaGlzdG9yaWFucyBxdWVzdGlvbiB3aGV0aGVyIHRoZXNlIGFjdGl2aXRpZXMgaGFkIG11Y2ggaW1wYWN0IHVwb24gdGhlIGNvbmR1Y3QsIGNvdXJzZSwgb3Igb3V0Y29tZSBvZiB0aGUgY29uZmxpY3QuIFRoZSBhdXRob3IncyBhZG1pcmF0aW9uIGFuZCBzeW1wYXRoeSBmb3IgYm90aCBNYWZmaXR0IGFuZCB0aGUgQ29uZmVkZXJhdGUgY2F1c2UgYXJlIGV2aWRlbnQgdGhyb3VnaG91dCB0aGUgbmFycmF0aXZlLCB0byB0aGUgcG9pbnQgdGhhdCBzb21lIG1heSBtaXN0YWtlIHRoaXMgYWNjb3VudCBmb3IgYSByb21hbnRpYyB0YWxlIG9mIHRoZSBMb3N0IENhdXNlLiBGb3IgaW5mb3JtZWQgcmVhZGVycy4iLCAiQnJvb2tzIEQuIFNpbXBzb24sIEFyaXpvbmEgU3RhdGUgVW5pdi4sIFRlbXBlIiwgIkNvcHlyaWdodCAxOTk0IFJlZWQgQnVzaW5lc3MgSW5mb3JtYXRpb24sIEluYy4iLCAiRnJvbSIsICJCb29rbGlzdCIsICJUaGF0IHRoaXMgYmlvZ3JhcGh5IG9mIENvbmZlZGVyYXRlIG5hdmFsIG9mZmljZXIgSm9obiBOZXdsYW5kIE1hZmZpdHQgaXMgcmF0aGVyIHNob3J0IGZvciBpdHMgcHJpY2UgaXMgbm90IGFsdG9nZXRoZXIgaXRzIGF1dGhvcidzIGZhdWx0LiBNdWNoIG9mIHRoZSBtYXRlcmlhbCBvbiBNYWZmaXR0J3MgY2FyZWVyIGFzIGEgVS5TLiBuYXZhbCBvZmZpY2VyIG5vdGVkIGZvciBoaXMgc2NpZW50aWZpYyB3b3JrIGFuZCBoaXMgcHVyc3VpdCBvZiBzbGF2ZSByYWlkZXJzLCBhcyBjYXB0YWluIG9mIHRoZSBDb25mZWRlcmF0ZSByYWlkZXIiLCAiRmxvcmlkYSIsICIsIGFuZCBhcyBibG9ja2FkZSBydW5uZXIgcGFyIGV4Y2VsbGVuY2UgZGlkIG5vdCBzdXJ2aXZlIHRoZSBDaXZpbCBXYXIgaXRzZWxmLiBHaXZlbiB0aG9zZSBsaW1pdGF0aW9ucywgU2hpbmdsZXRvbidzIGlzIGEgcmVhZGFibGUsIGluZm9ybWF0aXZlIHBvcnRyYWl0IG9mIGFuIGFibGUsIGFwcGVhbGluZyBmaWd1cmUuIFNoaW5nbGV0b24ncyBwcm8tQ29uZmVkZXJhdGUgYmlhcyBpcyBhcyBtdWNoIGFuIGFzc2V0IGFzIGEgbGlhYmlsaXR5IGFzIGhlIHByb3ZpZGVzIGEgdGhvcm91Z2ggZGVsaW5lYXRpb24gb2YgdGhlIGV4dGVudCBvZiBDb25mZWRlcmF0ZSBtYXJpdGltZSBhY3Rpdml0eSBhbmQgaXRzIHBvdGVudGlhbCwgaGFkIGl0IGJlZW4gcHVyc3VlZCBtb3JlIHZpZ29yb3VzbHksIGZvciBhZmZlY3RpbmcgdGhlIGNvdXJzZSBvZiB0aGUgd2FyLiIsICJSb2xhbmQgR3JlZW4iLCAiUmV2aWV3IiwgIlwiSW4gdGhpcyB0aG9yb3VnaGx5IGRvY3VtZW50ZWQsIGFidW5kYW50bHkgYW5ub3RhdGVkIHN0dWR5LCBSb3ljZSBTaGluZ2xldG9uIGhhcyBicmlsbGlhbnRseSByZXN1c2NpdGF0ZWQgdGhlIGxpZmUgYW5kIHRpbWVzIG9mIGhpcyBoZXJvIGFuZCBib3RoIGFyZSBpbnRlcmVzdGluZywgaW5kZWVkIGNhcHRpdmF0aW5nLsKgIE1hZmZpdHQgYXMgYSBtYW4gd2FzIGEgc3ViamVjdCB3b3J0aHkgb2YgYSBjYXJlZnVsbHkgZG9jdW1lbnRlZCBzdHVkeSBzdWNoIGFzIHRoaXMsIGlmIG9ubHkgYXMgYW4gZXhhbXBsZSBvZiBvbmUgb2YgdGhlIG1hbnkgYW5kIGRpdmVyc2UgY29tcG9uZW50cyBvZiBtaWQtbmluZXRlZW50aCBBbWVyaWNhbiBzb2NpZXR5Li4uLlRoZSByZXN1bHQgaXMgYSBoaWdobHkgc2lnbmlmaWNhbnQgc3R1ZHkgbm90IG9ubHkgb2YgTWFmZml0dCdzIGNhcmVlciwgYnV0IG9mIHRoZSBhY3F1aXNpdGlvbiBvZiB3YXJzaGlwcyBpbiBHcmVhdCBCcml0YWluLMKgaG93IHRoZXkgd2VyZSBtYW5uZWQgYW5kIHN1cHBsaWVkLCB0aGVpciBvcGVyYXRpb25zIGFuZCB0aGUgcmVzdWx0cy4uLi5UaGUgYWZ0ZXJtYXRoIG9mIHRoZSB3YXIgZm9yIE1hZmZpdHTCoGlzIG5vIGxlc3MgaW50ZXJlc3RpbmcgdGhhbiBoaXMgc2VydmljZSBhZmxvYXQtLXdoaWNoIHdhcyBub3TCoGxpbWl0ZWQgdG8gdGhlIFtjb21tZXJjZSBkZXN0cm95ZXJdwqBGbG9yaWRhLsKgIFRoZSBTb3V0aCBhbmQgU291dGhlcm5lcnMgYXMgYSB3aG9sZcKgcGFpZCBhIHRlcnJpYmxlIHByaWNlIGZvciB0aGVpciBzdWNjZXNzaW9uIGFuZCBNYWZmaXR0IHdhcyBubyBleGNlcHRpb24uwqAgU2hpbmdsZXRvbiB0ZWxscyB0aGUgc3RvcnkgYXMgaXQgd2FzLi4uYW5kIGxlYXZlcyBhIGxhc3RpbmcgaW1wcmVzc2lvbiBvZiBncmVhdCB0cmFnZWR5LCBvZiBubyBsZXNzIGhlcm9pc20sIG1pbmdsZWQgd2l0aCBhZG1pcmF0aW9uwqBmb3IgdGhlIHBlb3BsZSBjYXVnaHQgaW4gdGhlIHRocm9lcyBvZiBoaXN0b3J5IGluIHRoZSBtYWtpbmcuXCItLVVsYW5lIEJvbm5lbCwgUGFyaXMsIEZyYW5jZSwgVGhlIE5vcnRoZXJuIE1hcmluZXJcIlRoZSBoZXJvIG9mIEdXVFcgd2FzIFJoZXR0IEJ1dGxlciwgdGhlIGhhbmRzb21lIHNraXBwZXIgb2YgYSBDb25mZWRlcmF0ZSBibG9ja2FkZSBydW5uZXIsIGNvdXJhZ2VvdXMsIGNvb2wtaGVhZGVkLCBhbmQgYWJvdmUgYWxsLCBnYWxsYW50IHdpdGggdGhlIGxhZGllcy7CoCBXaXRoIHRoZSBkaWZmZXJlbmNlIHRoYXQgQnV0bGVyIHdhcyBhIGNpdmlsaWFuIHdoaWxlwqAuLi5NYWZmaXR0IHdhcyBhIENvbmZlZGVyYXRlIG5hdmFsIG9mZmljZXIsIE1hZmZpdHTCoGlzIFJoZXR0IEJ1dGxlciBtYWRlIGZsZXNoLi4uLlByb2Zlc3NvciBTaGluZ2xldG9uwqBbaGFzIG1hZGVdIGHCoGNvbXBsZXRlIGFuZMKgcmVhZGFibGUgYmlvZ3JhcGh5IG9mIHRoaXMgZW5nYWdpbmcsIGludGVyZXN0aW5nIG1hbi7CoCBBbnkgbmF2YWwgb2ZmaWNlciB3b3VsZCBiZSBkZWxpZ2h0ZWQgdG8gYmUgZGVzY3JpYmVkIGFzIE1hZmZpdHQgd2FzIGJ5IGhpcyBzaGlwbWF0ZXM6wqAgJ3RoZSB3YXJtZXN0LWhlYXJ0ZWQgYW5kIG1vc3QgZ2VuZXJvdXMgZnJpZW5kLi4udGhlIGxpZmUgb2YgdGhlIG1lc3MuLi5hIGJvcm4gc2FpbG9yIFtoZSB3YXMgYWN0dWFsbHkgwqBib3JuIGF0IHNlYV0gYW5kIGEgc3BsZW5kaWQgb2ZmaWNlcjsgZXF1YWxseSByZWFkeSBmb3IgYSBmaWdodCBvciBhIGpvbGxpZmljYXRpb24uwqAgQWxsIHRoZSBsYWRpZXMgYXJlIE1hZmZpdHQgbWFkLi4ud2hhdCBhIHZvaWNlIcKgIFdoYXQgd2l0IGFuZCBodW1vciEgRWF0IHlvdXIgaGVhcnQgb3V0IFJoZXR0IEJ1dGxlci5cIi0tSXJhIER5ZSwgVmlyZ2luaWEgQmVhY2gsIE1hcmluZXIncyBNaXJycm9yXCJUaGlzIGNvbXBhY3QgYmlvZ3JhcGh5IGlzIGluIG1hbnkgc2Vuc2VzIGEgbW9kZWwgd29yayBiZWNhdXNlIGl0IHN1Y2NpbmN0bHkgcmVjb3VudHMgdGhlIHN0b3J5IG9mIGl0IHByb3RhZ29uaXN0LCBwcm92aWRlcyB0aGUgYmFzaWMgY2hyb25vbG9neSBhbmQgZmFjdHMsIHNldHMgdGhlbSBpbiB0aGVpciBsYXJnZXIgbWF0cml4LCBhbmQgYWx3YXlzIGFsbG93cyB0aGUgc291cmNlcyB0byBzcGVhayBmb3IgdGhlbXNlbHZlcy7CoCBJbiBwYXJ0aWN1bGFyLCB0aGUgZGV0YWlscyBvZiB0aGUgbGlmZSBvZiB0aGUgeW91bmcgbmF2YWwgb2ZmaWNlciBhcmUgcHJlc2VudGVkIGluIGdyZWF0ZXIgZGV0YWlsIHRoYW4gZWxzZXdoZXJlLCB3aXRoIHNvbGlkIHVzZSBvZiB0aGUgTWFmZml0dCBsZXR0ZXJzIGFuZCBzZXJ2aWNlIHJlY29yZC4uLi5IaWdoIFNlYXMgQ29uZmVkZXJhdGUgd2lsbCB1bmRvdWJ0ZWRseSByZW1haW4gdGhlIHN0YW5kYXJkIGJpb2dyYXBoeSBvZiBKb2huIE5ld2xhbmQgTWFmZml0dCwgYW5kIGEga2V5IHZvbHVtZSBpbiBDb25mZWRlcmF0ZSBuYXZlbCBoaXN0b3Jpb2dyYXBoeSwgZm9yIHllYXJzIHRvIGNvbWUuXCItLUtlbm5ldGggSi4gQmx1bWUsIEFsYmFueSwgTmV3IFlvcmssIFRoZSBBbWVyaWNhbiBOZXB0dW5lXCJJbiB0aGlzIHN1Y2NpbmN0IGJpb2dyYXBoeSBvZiBKb2huIE5ld2xhbmQgTWFmZml0dCwgUm95Y2UgU2hpbmdldG9uIGRldm90ZXMgc2l4IG9mIHRoZSBmaWZ0ZWVuIGNoYXB0ZXJzIHRvIHRoZSBwcmUtYW5kIHBvc3QtQ2l2aWwgV2FyIGxpZmUgb2YgaGlzIHN1YmplY3QuwqAgVGhlIGJvb2ssIHRoZW4sIGlzIG1vcmUgdGhhbiBhIHN0dWR5IG9mIGEgQ29uZmVkZXJhdGUgbmF2eSBvZmZpY2VyLCBpdCBpcyBhIGNvbXBsZXRlIGJpb2dyYXBoeS7CoCBTaGluZ2xldG9uJ3MgdHJlYXRtZW50IG9mIE1hZmZpdHQncyBDaXZpbCBXYXIgY2FyZWVyIGlzIHRob3JvdWdowqBhbmQgd2VsbCBkb2N1bWVudGVkLi4uLkhpcyBiZXN0IGtub3duIGV4cGxvaXQgYWJvYXJkIHRoZcKgW0MuUy5TLl0gRmxvcmlkYSwgcnVubmluZyB0aHJvdWdoIHRoZSBVbmlvbiBibG9ja2FkZSBvZiBNb2JpbGUgYW5kIG91dCBhZ2FpbiwgaXMgZXhjaXRpbmdseSBhbmQgYWNjdXJhdGVseSBkZXNjcmliZWQ7IHRoZSBjcnVpc2Ugb2YgdGhlIEZsb3JpZGEgaXMgYWxzbyB3ZWxsIHByZXNlbnRlZC4uLi5IaWdoIFNlYXMgQ29uZmVkZXJhdGUgaXMgYSB3ZWxsLWRvY3VtZW50ZWQsIGludGVyZXN0aW5nLCBhbmQgc3VjY2luY3RseSB3cml0dGVuIGJpb2dyYXBoeSBvZiBKb2huIE5ld2xhbmQgTWFmZml0dCB0aGF0IHByZXNlbnRzIGhpbSBib3RoIGFzIGEgaHVtYW4gYmVpbmcgYW5kIGFzIGEgbmF2YWwgb2ZmaWNlci4uLi5mb3IgYWxsIENpdmlsIFdhciBidWZmcyBpdCBpcyBhIG11c3QuLi4uVGhpcyBvbmUgdGhvcm91Z2hseSBlbmpveWVkIGl0IVwiLS1XYXJyZW4gRi4gU3BlbmNlciwgRW1lcml0dXMsIFVuaXZlcnNpdHkgb2YgR2VvcmdpYSwgVGhlIEdlb3JnaWEgSGlzdG9yaWNhbCBRdWFydGVybHlcIk1vc3Qgb2YgdGhlIHJlY2VudCBwcm9maWxlcyBJIGhhdmUgcmVhZCBvZiBDaXZpbMKgIFdhciBzb2xkaWVycywgc2FpbG9ycywgYW5kIHBvbGl0aWNpYW5zLi4uLnRoZSBwcm90YWdvbmlzdCB3YW5kZXJzIHRocm91Z2ggdGhlIHBhZ2VzIG9mIHRoZSBib29rIC4uLndpdGhvdXQgZmlyZSwgc291bCwgb3IgbGlmZS7CoCBQcm9mZXNzb3IgUm95Y2UgU2hpbmdsZXRvbidzIG5ldyBiaW9ncmFwaHkgb2YgQ29uZmVkZXJhdGUgY29tbWVyY2UgcmFpZGVyIGFuZCBibG9ja2FkZSBydW5uZXIgSm9obiBOZXdsYW5kIE1hZmZpdHQgaXMgYSB3ZWxjb21lIGFudGlkb3RlIHRvIHRoZSB1c3VhbCBmYXJlLsKgIEhpZ2ggU2VhcyBDb25mZWRlcmF0ZSBpcyBhIGZpcnN0LXJhdGUgbGl0dGxlIHZvbHVtZSwgdG9sZCB3aXRoIGEgc2Nob2xhcidzIHByZWNpc2lvbiBhbmQgdGhlIHZlcnZlIGFuZCBwYWNlIG9mIGEgc3Rvcnl0ZWxsZXIuwqAgQ29sb3JmdWwsIGRhc2hpbmcsIGFuZMKgJ2dhbWUgdG8gdGhlIGJvbmUnLCBNYWZmaXR0IHN0cmlkZXMgdGhyb3VnaCB0aGUgcGFnZXMgb2YgdGhpcyBib29rIGxpa2UgdGhlIGFjdGlvbiBoZXJvIGluIGEgbm92ZWwuwqAgVGhpcyBib29rIHNob3VsZCBhcHBlYWwgdG8gQ2l2aWwgV2FyIGVudGh1c2lhc3RzLCBzY2hvbGFycywgYW5kIHRob3NlIHdobyBlbmpveSBhIHJvbGxpY2tpbmcgdGFsZSBvZiB0aGUgc2VhLlwiLS1aYWNrIEMuIFdhdGVycywgUm9tZSwgR2VvcmdpYSwgVGhlIEZsb3JpZGEgSGlzdG9yaWNhbCBRdWFydGVybHkiLCAiRnJvbSB0aGUgQXV0aG9yIiwgIlRoaXMgaXMgbXkgdGhpcmQgYm9vaywgYW5kIHVubGlrZSBteSBzaXR1YXRpb24gd2l0aCB0aGXCoGZpcnN0IHR3bywgSSB3YXMgYXdhcmRlZCBhIGNvbnRyYWN0IHRvIHdyaXRlIGl0IGJ5IHRoZSBVbml2ZXJzaXR5IG9mIFNvdXRoIENhcm9saW5hIFByZXNzLsKgIFRoaXMgcmVzdWx0ZWQgZnJvbSB0aGUgcmVjb21tZW5kYXRpb24gb2YgV2lsbGlhbSBOLiBTdGlsbCwgSnIuLMKgd2hvIHdhcyB0aGUgR2VuZXJhbCBFZGl0b3Igb2YgdGhlIFByZXNzJyBTdHVkaWVzIGluIE1hcml0aW1lIEhpc3RvcnkgU2VyaWVzIGluIHdoaWNoIHRoZSBib29rIGFwcGVhcnMuwqAgSSBjYW1lIHRvIGtub3cgYW5kIHdvcmsgd2l0aCBEci4gU3RpbGwsIGFuZCBiZWxpZXZlIHRoYXTCoGhlIHdhcyBhIHJlYWRlciBvZiB0aGUgV29vZCBtYW51c2NyaXB0IGFuZCByZWNvbW1lbmRlZCBpdCB0byB0aGUgVW5pdmVyc2l0eSBvZiBHZW9yZ2lhIFByZXNzLCB3aGVyZSBJIGhhZCBzaG9wcGVkIHRoZSBtYW51c2NyaXB0IChzdWNoIHJlYWRlcnMgYXJlIG5vdCB1c3VhbGx5IHJldmVhbGVkKS7CoCBIYXZpbmcgYSBib29rIGNvbnRyYWN0IGlzIHBvc2l0aXZlIGluIHRoYXQgaXQgbmVnYXRlcyBzaG9wcGluZyBmb3IgYSBwdWJsaXNoZXIgYWZ0ZXIgdGhlIG1hbnVzY3JpcHQgaXMgY29tcGxldGVkLCBidXQgaGFzIGRyYXdiYWNrcyBpbiB0aGF0LCBhdCBsZWFzdCBpbiBteSBjYXNlLCB0aGUgY29udHJhY3QgY3JlYXRlZCBhIGRlYWRsaW5lIGFuZCBsaW1pdGVkIHRoZSBudW1iZXIgb2Ygd29yZHMuVGhlwqBNYWZmaXR0IGJvb2sgaXPCoHNvbWV3aGF0IGxpa2UgbXkgZmlyc3QsIHRoZSBXb29kIGJvb2ssIGluIHRoYXQgYm90aCB3ZXJlwqBDb25mZWRlcmF0ZSBuYXZhbCBvZmZpY2VycyB3aG8gYXR0YWNrZWQgVW5pb27CoGludGVyZXN0cyzCoGJ1dCBkaWZmZXJlbnQgaW4gdGhhdCBNYWZmaXR0IHdhcyBhIGhpZ2ggc2VhcyByYWlkZXIsIHdoaWxlIFdvb2Qgd2FzIGEgY29hc3RhbCByYWlkZXIuwqAgQWxsIHRocmVlIGJvb2tzIGFyZSBzZXQgaW4gdGhlIG1pZC1uaW5ldGVlbnRoIGNlbnR1cnksIGVzcGVjaWFsbHkgZHVyaW5nIHRoZSBwZXJpb2Qgb2YgdGhlIFdhciBCZXR3ZWVuIHRoZSBTdGF0ZXMswqB5ZXQgdGhlIFBldGVycyBib29rIGlzIHF1aXRlIGRpZmZlcmVudCBpbiB0aGF0wqB0aGUgc3ViamVjdMKgd2FzIGEgY2l2aWxpYW4gd2hvIGVuZ2FnZWQgaW4gYnVzaW5lc3MgYWN0aXZpdGllcy4iLCAiRnJvbSB0aGUgSW5zaWRlIEZsYXAiLCAiRmlyc3QgcHJpbnRpbmcgKDE5OTQpwqBzaG9ydCBibHVyYiBvZiB0aGUgYXV0aG9yLMKgYmVsb3cgd2hpY2jCoGlzIGFsc28gYSBwaG90byBvZiB0aGUgYXV0aG9yU2Vjb25kIHByaW50aW5nLCBzaG93biBoZXJlwqAoMTk5NSkgZmxhcCBkZXNpZ24gc2FtZSBhcyBmaXJzdCBwcmludGluZyIsICJGcm9tIHRoZSBCYWNrIENvdmVyIiwgIlwiTGl2ZWx5IGFuZCBpbnRlcmVzdGluZy4uLi5BZGRzIG11Y2ggZGVwdGggYW5kIGh1bWFuIGRyYW1hIHRvIHRoZSBhY3Rpdml0aWVzIG9mIGJsb2NrYWRlIHJ1bm5lcnMuXCItLUZyYW5rIEwuIE93c2xleSwgSnIuLCBhdXRob3Igb2YgdGhlIEMuUy5TLiBGbG9yaWRhOiBIZXIgQnVpbGRpbmcgYW5kwqAgT3BlcmF0aW9ucyIsICJcIkhpZ2ggU2VhcyBDb25mZWRlcmF0ZSB0YWtlcyByZWFkZXJzIGFib2FyZCBDb25mZWRlcmF0ZSBibG9ja2FkZS1ydW5uZXJzIGFuZCByYWlkaW5nIHZlc3NlbHMgdG8gbWVldCBvbmUgb2YgdGhlIENpdmlsIFdhcidzIG1vc3Qgc3VjY2Vzc2Z1bCBhbmQgY29sb3JmdWwgbmF2YWwgb2ZmaWNlcnMtLUpvaG4gTmV3bGFuZCBNYWZmaXR0LsKgIEluIHRoZSBmaXJzdCBtb2Rlcm4gYmlvZ3JhcGh5IG9mIHRoZSBzd2FzaGJ1Y2tsaW5nIGNhcHRhaW4gd2hvIHBlbmV0cmF0ZWQgRmVkZXJhbCBibG9ja2FkZXMgYW5kIHN3ZXB0IE5vcnRoZXJuIGNvbW1lcmNpYWwgc2hpcHMgZnJvbSB0aGUgQXRsYW50aWMsIFJveWNlIFNoaW5nbGV0b24gZGVtb25zdHJhdGVzIHRoYXQgTWFmZml0dCB3YXMgYSBzaWduaWZpY2FudC0taWYgcHJldmlvdXNseSB1bmhlcmFsZGVkLS1maWd1cmUgaW4gdGhlIENvbmZlZGVyYXRlIG5hdnkuwqDCoMKgwqAgVXNpbmcgdGhlIEpvaG4gTmV3bGFuZCBNYWZmaXR0IHBhcGVycyBmcm9tIHRoZSBTb3V0aGVybiBIaXN0b3JpY2FsIENvbGxlY3Rpb24sIFNoaW5nbGV0b24gdHJhY2VzIE1hZmZpdHQncyBjbGltYiBpbiByYW5rIGFuZCByZXB1dGF0aW9uIGFtb25nIHRoZSBDb25mZWRlcmF0ZSBvZmZpY2VycywgYmVnaW5uaW5nIHdpdGggaGlzIGNvbW1hbmQgb2YgdGhlIGd1bmJvYXQgQy5TLlMuIFNhdmFubmFoIGF0IHRoZSBvbnNldCBvZiB0aGUgd2FyIGFuZCBlbmRpbmcgd2l0aCBoaXMgY29tbWFuZCBvZiB0aGUgYmxvY2thZGUgcnVubmVyIE93bCBpbiAxODY0LTY1LsKgIER1cmluZyB0aGUgd2FyIE1hZmZmaXR0IGNvbW1hbmRlZCBzZXZlcmFsIG90aGVyIHNoaXBzIGluY2x1ZGluZyB0aGUgZmFtb3VzIEMuUy5TLiBGbG9yaWRhLCBjYXB0dXJlZCB0d2VudHktdGhyZWUgbWVyY2hhbnQgdmVzc2VscywgYW5kIGNvbXBsZXRlZCBleHRyYW9yZGluYXJ5IHJ1bnMgaW50byB0aGUgcG9ydHMgb2YgV2lsbWluZ3RvbiwgTW9iaWxlLCBhbmQgR2FsdmVzdG9uLsKgIFNoaW5nbGV0b24gcHJhaXNlcyB0aGUgY291cmFnZSwgcXVpY2sgdGhpbmtpbmcsIHNlYW1hbnNoaXAsIGFuZCBuYXZpZ2F0aW9uYWwgc2tpbGwgdGhhdCBtYWRlIE1hZmZpdHQgZWZmZWN0aXZlIGluIGJhdHRsaW5nIGEgbXVjaCBsYXJnZXIgYW5kIGJldHRlci1lcXVpcHBlZCBmb2UuwqDCoE1hZmZpdHQncyBleHBsb2l0cyByZXZlYWwgaGlzIHZhbHVlIHRvIHRoZSBDb25mZWRlcmFjeSBhbmQgdGhlIGFkdmVudHVyZSBvZiBsaWZlIG9uIHRoZSBoaWdoIHNlYXMgZHVyaW5nIHRoZSBDaXZpbCBXYXIuXCIiLCAiQWJvdXQgdGhlIEF1dGhvciIsICJUaGUgTWFmZml0dCBib29rIHdvbiB0aGUgQ2xhcmVuZG9uIEN1cCwgYW4gYXdhcmQgZ2l2ZW4gYnkgdGhlIExvd2VyIENhcGUgRmVhciBIaXN0b3JpY2FsIFNvY2lldHkgYW5udWFsbHkgZm9yIHRoZSBiZXN0IGJvb2sgZGVhbGluZyB3aXRoIHRoZSBhcmVhLsKgIFRoZSBTb2NpZXR5IGlzIGJhc2VkwqBpbiBXaWxtaW5ndG9uLCBOQywgYSBwb3J0IGNpdHkgb24gdGhlIENhcGUgRmVhciBSaXZlciB0aGF0IE1hZmZpdHQgdmlzaXRlZCBhcyBhIGJsb2NrYWRlIHJ1bm5lciBkdXJpbmcgdGhlIHdhcizCoGFuZCB3aGVyZSBoZSBsYXRlciDCoHJldGlyZWQuwqAgSSBkZWNpZGVkLCB0b2dldGhlciB3aXRoIG15IHdpZmUsIHRvIGFwcGVhciBpbiBwZXJzb24gdG8gYWNjZXB0IHRoZSBhd2FyZC7CoCBGb2xsb3dpbmcgYSBicmllZiB0YWxrIHRvIHRoZSBTb2NpZXR5wqBhbmTCoHRoZSBhY2NlcHRhbmNlIG9mIMKgdGhlIGF3YXJkLCB3ZSB0aGVuIGhlbGQgYSBib29rIHNpZ25pbmcgc2Vzc2lvbi7CoCBXZSB3ZXJlIGFsc28gZ2l2ZW4gYSB0b3VyIG9mIHRoZSBjaXR5LCBkdXJpbmcgd2hpY2jCoHdlIHNhdyBhIHJpdmVyIGNydWlzZSBib2F0IG5hbWVkIHRoZcKgXCJKb2huIE4uIE1hZmZpdHQuXCLCoCBGaW5hbGx5IHdlIHdlcmUgaW52aXRlZCBieSBhIGRlc2NlbmRhbnQgb2YgTWFmZml0dCBpbnRvIGhlcsKgaGlzdG9yaWMgaG9tZcKgd2hlcmUgd2Ugdmlld2VkIGFuY2VzdHJhbCBwb3J0cmFpdHMsIGFuZCB3aGVyZcKgYXQgb25lIHBvaW50wqB0aGXCoGRlY2VuZGFudCB0b29rIGEgc3dvcmTCoGZyb20gdGhlIHdhbGwgYW5kIGhhbmRlZCBpdCB0byBtZSBmb3IgYSBmZXfCoG1vbWVudHMuwqAgSXQgd2FzIE1hZmZpdHQncyBzd29yZC4iLCAiUmVhZCBtb3JlIl0sICJwcmljZSI6IDMxLjAsICJpbWFnZXMiOiBbXSwgInZpZGVvcyI6IFtdLCAic3RvcmUiOiAiUm95Y2UgU2hpbmdsZXRvbiAoQXV0aG9yKSIsICJjYXRlZ29yaWVzIjogWyJCb29rcyIsICJIaXN0b3J5IiwgIkFtZXJpY2FzIl0sICJkZXRhaWxzIjogeyJQdWJsaXNoZXIiOiAiVW5pdmVyc2l0eSBvZiBTb3V0aCBDYXJvbGluYSBQcmVzczsgRmlyc3QgRWRpdGlvbiAoQXByaWwgMSwgMTk5NCkiLCAiTGFuZ3VhZ2UiOiAiRW5nbGlzaCIsICJIYXJkY292ZXIiOiAiMjAwIHBhZ2VzIiwgIklTQk4gMTAiOiAiMDg3MjQ5OTg2MyIsICJJU0JOIDEzIjogIjk3OC0wODcyNDk5ODY3IiwgIkl0ZW0gV2VpZ2h0IjogIjE1LjIgb3VuY2VzIiwgIkRpbWVuc2lvbnMiOiAiNi41IHggMSB4IDkuMjUgaW5jaGVzIn0sICJwYXJlbnRfYXNpbiI6ICIwODcyNDk5ODYzIn0=,book-metadata,3,226161,2025-11-02T16:57:22.227Z,0,2025-11-02T17:02:23.920Z,2025-11-02
MzMxYWNhZGQ2ODMxM2U5NjljZGFiNjRiYjJiNjc5YmE=,eyJtYWluX2NhdGVnb3J5IjogIkJvb2tzIiwgInRpdGxlIjogIk5ldyBQZXJzcGVjdGl2ZXMgb24gSFRNTCwgWEhUTUwsIGFuZCBYTUw6IENvbXByZWhlbnNpdmUgKE5ldyBQZXJzcGVjdGl2ZXMgU2VyaWVzOiBXZWIgRGVzaWduKSIsICJhdmVyYWdlX3JhdGluZyI6IDQuMiwgInJhdGluZ19udW1iZXIiOiAxNywgImZlYXR1cmVzIjogWyJORVcgUEVSU1BFQ1RJVkVTIE9OIENSRUFUSU5HIFdFQiBQQUdFUyBXSVRIIEhUTUwsIFhIVE1MLCBBTkQgWE1MIHRlYWNoZXMgc3R1ZGVudHMgaG93IHRvIGNyZWF0ZSBzaW1wbGUgdG8gY29tcGxleCBXZWIgc2l0ZXMgZnJvbSBzY3JhdGNoIHVzaW5nIEhUTUwsIFhIVE1MLCBhbmQgWE1MLiJdLCAiZGVzY3JpcHRpb24iOiBbIkFib3V0IHRoZSBBdXRob3IiLCAiTXIuIFBhdHJpY2sgQ2FyZXkgcmVjZWl2ZWQgaGlzIE0uUy4gaW4gQmlvc3RhdGlzdGljcyBmcm9tIHRoZSBVbml2ZXJzaXR5IG9mIFdpc2NvbnNpbiwgd2hlcmUgaGUgd29ya2VkIGFzIGEgcmVzZWFyY2hlciBkZXNpZ25pbmcgYW5kIGFuYWx5emluZyBjbGluaWNhbCBzdHVkaWVzLiBIZSBjby1hdXRob3JlZCBoaXMgZmlyc3QgdGV4dGJvb2sgb24gdXNpbmcgRXhjZWwgYXMgYSBzdGF0aXN0aWNhbCB0b29sLiBUb2RheSwgTXIuIENhcmV5IGhhcyBhdXRob3JlZCBvciBjby1hdXRob3JlZCBtb3JlIHRoYW4gMjAgbGVhZGluZyBhY2FkZW1pYyBhbmQgdHJhZGUgdGV4dHMgZm9yIHRoZSBzb2Z0d2FyZSBpbmR1c3RyeS4iXSwgInByaWNlIjogNy4xOSwgImltYWdlcyI6IFtdLCAidmlkZW9zIjogW10sICJzdG9yZSI6ICJQYXRyaWNrIENhcmV5IChBdXRob3IpIiwgImNhdGVnb3JpZXMiOiBbIkJvb2tzIiwgIkNvbXB1dGVycyAmIFRlY2hub2xvZ3kiLCAiTmV0d29ya2luZyAmIENsb3VkIENvbXB1dGluZyJdLCAiZGV0YWlscyI6IHsiUHVibGlzaGVyIjogIkNlbmdhZ2UgTGVhcm5pbmc7IDNyZCBlZGl0aW9uIChNYXkgMTQsIDIwMDkpIiwgIkxhbmd1YWdlIjogIkVuZ2xpc2giLCAiUGFwZXJiYWNrIjogIjk5MiBwYWdlcyIsICJJU0JOIDEwIjogIjA0OTU4MDY0MDQiLCAiSVNCTiAxMyI6ICI5NzgtMDQ5NTgwNjQwMCIsICJJdGVtIFdlaWdodCI6ICI0LjM1IHBvdW5kcyIsICJEaW1lbnNpb25zIjogIjguNzUgeCAxLjI1IHggMTEgaW5jaGVzIn0sICJwYXJlbnRfYXNpbiI6ICIwNDk1ODA2NDA0In0=,book-metadata,3,226162,2025-11-02T16:57:22.234Z,0,2025-11-02T17:02:23.920Z,2025-11-02
MDg2YmIxYjJkZTk5MmVmMjNiZDA2Nzg2YmEwMjAxYTc=,eyJtYWluX2NhdGVnb3J5IjogIkJvb2tzIiwgInRpdGxlIjogIkdhbWVzIGF0IEhvbWU6IEEgZ3VpZGUgZm9yIGZhbWlseSBmdW4gdXNpbmcgaG91c2Vob2xkIGl0ZW1zIiwgImF2ZXJhZ2VfcmF0aW5nIjogNS4wLCAicmF0aW5nX251bWJlciI6IDYsICJmZWF0dXJlcyI6IFsiR2FtZXMgYXQgSG9tZSBpcyBhIERJWSBzdHlsZSBnYW1lIGJvb2sgZm9jdXNlZCBvbiBzdGlycmluZyBhbmQgbnVydHVyaW5nIHRoZSBjcmVhdGl2aXR5IGluIHVzIGFsbCwgYnV0IGVzcGVjaWFsbHkgYWltZWQgYXQgeW91bmcgY2hpbGRyZW4uIEluc2lkZSB5b3XigJlsbCBmaW5kIDMwIGdhbWVzIHRoYXQgZmFtaWxpZXMgY2FuIHBsYXkgdG9nZXRoZXIgdXNpbmcgc2ltcGxlIGFuZCBjb21tb24gaG91c2Vob2xkIGl0ZW1zLiBFYWNoIGdhbWUgaGFzIGF0IGxlYXN0IDIgdmFyaWF0aW9ucyBwbHVzIGEgbWluaW11bSBvZiAzIGdvYWxzIHRvIGFjY29tcGxpc2ggdG90YWxpbmcgdXAgdG8gYXQgbGVhc3QgMTgwIGNvbWJpbmF0aW9ucyBvZiBnYW1lcGxheSEgUGx1cyB5b3VyIGZyaWVuZHMgYW5kIGZhbWlseSB3aWxsIGhhdmUgZW5kbGVzcyBmdW4gY29taW5nIHVwIHdpdGggbmV3IGdhbWVzIG9mIHlvdXIgb3duLiBZb3UgbmV2ZXIga25vdyB3aGF0IHlvdSBjYW4gY29tZSB1cCB3aXRoISBFbmpveSEiXSwgImRlc2NyaXB0aW9uIjogWyJBYm91dCB0aGUgQXV0aG9yIiwgIkphbWVzIFJ5YW4gZ3JldyB1cCBhcyBhIHNpbmdsZSBjaGlsZCBpbiBTb3V0aGVybiBDYWxpZm9ybmlhIHRoYXQgbG92ZWQgdG8gZXhwbG9yZSBuZXcgYWR2ZW50dXJlcy4gRnJvbSBmb290YmFsbCB0byBza2F0ZWJvYXJkaW5nLCBub3RoaW5nIHdhcyBvZmYgbGltaXRzIHRvIGxlYXJuLiBIZSBlbnRlcmVkIHRoZSBtaWxpdGFyeSByaWdodCBhZnRlciBoaWdoIHNjaG9vbCBhbmQgaGFzIGJlZW4gYXJvdW5kIHRoZSB3b3JsZCBzZXZlcmFsIHRpbWVzLiBBZnRlciBnZXR0aW5nIG91dCBhbmQgbW92aW5nIHRvIExvcyBBbmdlbGVzIHRvIHB1cnN1ZSBhY3RpbmcsIGhlIGdvdCBpbnZvbHZlZCB3aXRoIHNjZW5lcyBmcm9tIGJlaGluZCB0aGUgY2FtZXJhLiBIZSBpcyBjdXJyZW50bHkgYSBmcmVlbGFuY2UgdGVsZXZpc2lvbiBwcm9kdWNlciBsaXZpbmcgaW4gTG9zIEFuZ2VsZXMgYnV0IGhhcyBhIG5ld2ZvdW5kIHBhc3Npb24gZm9yIHdyaXRpbmcgYW5kIGNyZWF0aW5nIGNoaWxkcmVuJ3MgZ2FtZSBib29rcy4gSGUgbG92ZXMgdG8gdHJhdmVsIGFuZCBsZWFybiBuZXcgc2tpbGxzLiBIZSBjYW4gYmUgcmVhY2hlZCBhdCBqYW1lc21yeWFuanJAZ21haWwuY29tIl0sICJwcmljZSI6IDUuMTcsICJpbWFnZXMiOiBbeyJsYXJnZSI6ICJodHRwczovL20ubWVkaWEtYW1hem9uLmNvbS9pbWFnZXMvSS81MXM1b2pjYUNMTC5fU1gzODVfQk8xLDIwNCwyMDMsMjAwXy5qcGciLCAidmFyaWFudCI6ICJNQUlOIn1dLCAidmlkZW9zIjogW10sICJzdG9yZSI6ICJKYW1lcyBNaWNoYWVsIFJ5YW4gSnIuIChBdXRob3IpLCAgS3Jpc3RvcGhlciBXaGl0ZSAoRWRpdG9yKSwgIENpbmR5IFF1YWNoIChJbGx1c3RyYXRvciksICBUcmFjZXkgUnlhbiAoSWxsdXN0cmF0b3IpIiwgImNhdGVnb3JpZXMiOiBbIkJvb2tzIiwgIkh1bW9yICYgRW50ZXJ0YWlubWVudCIsICJQdXp6bGVzICYgR2FtZXMiXSwgImRldGFpbHMiOiB7IlB1Ymxpc2hlciI6ICJKYW1lcyBSeWFuOyAxc3QgZWRpdGlvbiAoSmFudWFyeSAyNywgMjAxNykiLCAiTGFuZ3VhZ2UiOiAiRW5nbGlzaCIsICJQYXBlcmJhY2siOiAiODggcGFnZXMiLCAiSVNCTiAxMCI6ICIwOTk4NjQyMjA3IiwgIklTQk4gMTMiOiAiOTc4LTA5OTg2NDIyMDgiLCAiSXRlbSBXZWlnaHQiOiAiOCBvdW5jZXMiLCAiRGltZW5zaW9ucyI6ICI4LjUgeCAwLjIgeCAxMSBpbmNoZXMifSwgInBhcmVudF9hc2luIjogIjA5OTg2NDIyMDcifQ==,book-metadata,3,226163,2025-11-02T16:57:22.238Z,0,2025-11-02T17:02:23.920Z,2025-11-02


main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together
Books,California Contractor General Building (B) Exam: A Complete Prep Guide,4.2,244,"List(This book is the only one you will need to pass the 2023 California General Builder (B) Exam. We give you all of the knowledge necessary by spelling out the principles and concepts covered on the exam. In addition, and perhaps most importantly, the codes are translated into an easy-to-read format that does away with the long and often confusing wording found in the formal California Building Code. Whether you are a novice or possess a wealth of construction knowledge, this is the book you need to prepare yourself for the California General Builder (B) exam.)",List(),24.39,"List(Map(large -> https://m.media-amazon.com/images/I/51ZR+Hc-jDL._SX258_BO1,204,203,200_.jpg, variant -> MAIN))",List(),Contractor Education Inc. (Author),"List(Books, Crafts, Hobbies & Home, Home Improvement & Design)","Map(Dimensions -> 8.5 x 0.89 x 11 inches, Language -> English, Item Weight -> 2.01 pounds, ISBN 13 -> 978-1530838523, Publisher -> CreateSpace Independent Publishing Platform (March 31, 2016), Paperback -> 395 pages, ISBN 10 -> 1530838525)",1530838525,
Books,Reiki for Beginners: How to Heal Yourself with Reiki,4.3,27,"List(Would you like to learn how to heal yourself and help others?, Written by a Reiki Master Teacher who has attuned over 200 people around the world to Reiki,, Reiki for Beginners: How to Heal Yourself with Reiki, is a must-have guide to everything you need to know about this sacred Japanese healing art., Discover why this ancient energy healing system has gained mass popularity in recent times., Discover why this ancient energy healing system has gained mass popularity in recent times., Awaken your natural healing abilities., Awaken your natural healing abilities., Understand how Reiki heals mind, body and soul., Understand how Reiki heals mind, body and soul., Learn an extremely easy technique for deep relaxation and peace of mind... and much, much more., Learn an extremely easy technique for deep relaxation and peace of mind... and much, much more., As anyone who has received a Reiki healing treatment knows firsthand, Reiki is nothing short of life-changing. This magical energy has been known to heal everything from headaches and broken bones to cancer, anxiety and depression. Dogs and cats in particular strongly believe the world over should learn Reiki. ""Just imagine how many people could practice Reiki on me then!"" says Jazzy Blue, a dog from Southern California. (That is a direct quote, by the way.), What readers are saying..., ""As a real beginner, I learned so much from this easy-to-read and well-written book on Reiki....I signed up for Reiki One as soon as I finished it!"" Deborah Jacobs, ""Reiki for Beginners should be required reading in every hospital, every doctor's office, every school.... anyone with an interest in healing should read this book. You will be so happy you did."" Carmen Aguilera, ""The best Reiki book I've ever read."" Karen Johnson, Learn how to reduce stress and tap into your full potential with Reiki., A Reiki attunement will increase your psychic abilities. Teach you how to stay calm in this busy world of technological overload and to-do lists. Improve your life in ways you can't even imagine. Reiki will heal you on a soul level, remind you why you are here on Earth in these exciting times, and help you get in touch with your true purpose. Regardless of whether you want to learn Reiki for personal healing, to help others, or to send healing energy to the animals of our world who are so desperately in need, one thing is certain: once you become attuned to Reiki, your life will never quite be the same again. You will be stronger. More in touch with your POWER. Better able to stay centered no matter what happens around you. The time is now. Let's make this world a better place, one Reiki practitioner at a time., Are you ready to learn how to activate your healing abilities with Reiki?, Start today by scrolling to the top of the page and clicking the Buy button. We sure can use you!)",List(),11.11,"List(Map(large -> https://m.media-amazon.com/images/I/517heho1SGL._SX326_BO1,204,203,200_.jpg, variant -> MAIN))",List(),Brooke Betts (Author),"List(Books, Health, Fitness & Dieting, Alternative Medicine)","Map(Dimensions -> 5.25 x 0.31 x 8 inches, Language -> English, Item Weight -> 7.5 ounces, ISBN 13 -> 978-0692349878, Publisher -> Brooke Betts (April 25, 2015), Paperback -> 134 pages, ISBN 10 -> 0692349871)",692349871,
Books,"Build Powerful Nerve Force, Revised: It Controls Your Life-Keep It Healthy",4.7,96,"List(If you are suffering from stress overload, chronic fatigue, insomnia, depression or other related illnesses you really should read this book. Your nervous system is like the central wiring of your body, heart and soul and there are guidelines and specific steps in this book that will help you keep that system healthy. The book explains how it all works in easy-to-understand language and provides profound insights, such as how your gut influences your emotional health -- and idea that Paul and Patricia had years ago that is just now becoming the 'hot idea' among leading medical researchers. The book also covers spiritual health, natural foods and how to relax fully and experience recharging sleep that is so important to our health.)","List(Review, ""If you want to be inspired and encouraged on your healing journey this is a nice bedside companion. Patricia Bragg's heart is in the right place, and I'm sure that, in person, she is a really wonderful teacher."" ~ Dr. R.M. Young.""I have known the Bragg books for more than 25 years. They are a blessing to me and my family and to all who read them to help make this a healthier world."" ~ Pastor Mike Macintosh, Horizon Christian Fellowship.""Bragg books were my conversion to the healthy way,"" James F. Balch, M.D.,, Prescription for Nutritional Healing, .""I've been reading Bragg books since high school. I'm thankful for the healthy lifestyle and admire their health crusading to make this a healthier world!"" ~ Steve Jobs, creator of Apple Inc.""I am very biased towards any product by Paul and Patricia Bragg. Their philosophy about how to live a healthier life suits me."" ~ Katherine, Amazon review., From the Author, Dr. Patricia Bragg, who was recently called ""The Queen of Health,"" by singer, Katy Perry, has been leading advocate of health and healthy lifestyles for more than 60 years. This book is especially important to her because her father, Dr. Paul C. Bragg, the originator of health food stores in America in 1912, was also a pioneer in understanding and writing about the human nervous system. He took a 'mind-body' approach that was far ahead of its time and remains on the leading edge of our evolving consciousness about our health., From the Inside Flap, Millions of healthy, happy followers have learned how to control and increase their vital nerve force by reading this book. The important information on how to build a strong nervous system, prevent future damage and maintain its health over decades, is all in this book., From the Back Cover, Whether you are dealing with a nagging nervousness or anything all the way to fibromyalgia, anxiety and clinical depression, learning how to build a strong nerve force and then how to keep it strong over the years is critical to your health. The information that Paul and Patricia Bragg offer in this book show you how. It is one of ten 'self-health' books written by these best-selling authors., About the Author, Paul, and his daughter, Patricia, have been health pioneers for decades. When the 'engineered fast food' products of science and industry had captured the attention of most Americans, Paul C. Bragg campaigned for a diet and lifestyle that focused on natural live foods and a healthy regime for a vital and long life. These ideas, based around natural and organic foods, are gaining praise and acceptance world-wide. Patricia remains the dynamic CEO of Bragg Live Food Products, Inc., in Santa Barbara, California., Read more)",6.58,"List(Map(large -> https://m.media-amazon.com/images/I/31CB6GVE5YL._BO1,204,203,200_QL40_FMwebp_.jpg, variant -> MAIN))",List(),"Patricia Bragg (Author), Paul Bragg (Author)","List(Books, Health, Fitness & Dieting, Alternative Medicine)","Map(Dimensions -> 6 x 0.57 x 9.02 inches, Language -> English, Item Weight -> 12 ounces, ISBN 13 -> 978-0877900948, Publisher -> Bragg; Revised edition (April 2, 2002), Paperback -> 208 pages, ISBN 10 -> 0877900949)",877900949,
Books,Satan Exposed: Defeating the Powers of Darkness,4.1,15,"List(Bestselling Author Makes Spiritual Warfare Strategies Accessible to All, It seems strange to consider: The Creator of the universe is at war with one of his own creatures. The cosmic battle with evil is real, however, and spilling over into the lives of unsuspecting dwellers here on earth.In this evangelically friendly approach to spiritual warfare, Richards describes the invisible war raging around us. Through careful analysis and exceptional scriptural insight, he exposes the origin of evil, the demonic hierarchy, and Satan's current strategies. Richards then helps God's people go on the offensive. This revealing look at deliverance will help even the most cautious believers participate in Jesus's victory and move confidently to defeat the power of darkness in their own lives.)","List(From the Back Cover, YOU Can Win the Daily Battles against Evil, Most people want to overcome persistent struggles, but they keep hitting an invisible wall that they cannot see or understand. Are you stuck in that place of fear and frustration? You don't have to be!, In this practical, real-world guide, bestselling author Larry Richards reveals the truth about the origin of evil, the demonic hierarchy and the role Satan has played in both biblical history and our own lives. After exposing the enemy's motives and methods, Richards shares how you can participate in Christ's victory, shake off fear and go on the offensive. Packed with insight and hands-on battle-tested strategies, this book will help you claim victory, protect yourself and your family from Satan's schemes and restore your life.""An important book on the reality of Satan and evil in our world today. Larry Richards reveals the impact of the power of darkness and provides practical insights and applications to overcoming Satan's schemes in our daily lives.""--, S, amuel J. Voorhies, , president/CEO, Voorhies International Consulting, www.samvoorhies.com ""This book addresses real-life issues that each of us faces, explained with real-life illustrations. It is enjoyable and easy to read, but challenges us with eminently practical ways that we can make a difference where God has placed each of us."", --from the foreword by, C, raig, K, eener, , professor of biblical studies, Asbury Theological Seminary, Wilmore, Kentucky, About the Author, Larry Richards, , who has written more than two hundred books, holds a ThM in Christian education and a PhD in religious education and social psychology. A teacher, educator, and writer, he is currently a full-time author and speaker. His bestselling, Teen Study Bible, , which he wrote with his wife, Sue, has sold three million copies. Larry and Sue live in, Raleigh, North Carolina, .)",12.59,"List(Map(large -> https://m.media-amazon.com/images/I/51mu4UrT2AL._SX322_BO1,204,203,200_.jpg, variant -> MAIN))",List(),Larry Richards (Author),"List(Books, Christian Books & Bibles, Christian Living)","Map(Dimensions -> 5.5 x 0.44 x 8.5 inches, Language -> English, Item Weight -> 8.1 ounces, ISBN 13 -> 978-0800795863, Publisher -> Chosen Books (October 6, 2015), Paperback -> 192 pages, ISBN 10 -> 0800795865)",800795865,
Books,"Elizabeth of Bohemia: A Novel about Elizabeth Stuart, the Winter Queen",3.0,12,"List(A sweeping, cinematic novel about the life of the Winter Queen, Elizabeth Stuart October 1612. King James I is looking to expand England’s influence in Europe, especially among the Protestants. He invites Prince Frederic of the Palatinate to London and offers him his sixteen-year-old daughter Elizabeth’s hand in marriage. The fierce and intelligent Elizabeth moves to Heidelberg Castle, Frederic’s ancestral home, where she is favored with whatever she desires, and the couple begins their family. Amid much turmoil, the Hapsburg emperor is weakened, and with help from Bohemian rebels, Frederic takes over royal duties in Prague. Thus, Elizabeth becomes the Queen of Bohemia. But their reign is brief. Within the year, Catholic Europe unites to take back the Hapsburg throne. Defeated at the Battle of White Mountain, Frederic, Elizabeth, and their children are forced into exile for a much-reduced life in The Hague. Despite tumultuous seasons of separation and heartache, the Winter Queen makes every effort to keep her family intact. Written with cinematic flair, this historical novel brings in key figures such as Shakespeare and Descartes as it recreates the drama and intrigue of 17th-century England and the Continent. Elizabeth’s children included Rupert of the Rhine and Sophia of Hanover, from whom the Hanoverian line descended to the present Queen Elizabeth II.)","List(Review, “A wonderfully scripted and entertaining read from cover to cover, Elizabeth of Bohemia is certain to be an immediate and enduringly popular addition to community library Historical Fiction collections.” ― Midwest Book Review -- Kirkus Reviews, From the Back Cover, “Elias’s use of language to re-create the period is striking… A highly readable telling of a royal fall from grace.” ― Kirkus Reviews “Lively and engrossing… Rich with historical detail and political intrigue, Elizabeth of Bohemia is a complex portrait of a reluctant yet captivating queen.” ― Foreword Reviews October 1612. King James I seeks to expand England’s influence in Europe and offers Prince Frederic of the Palatinate his sixteen-year-old daughter Elizabeth’s hand. The fierce and intelligent Elizabeth moves to Heidelberg Castle with her new husband, where she turns a daughter’s duty into a wife’s ambition. When the Hapsburg emperor is weakened, Elizabeth encourages Frederic to take over the royal duties in Prague, and in the process she becomes Queen of Bohemia. But the reign is brief. Within the year, Catholic Europe unites to take back the Hapsburg throne. Frederic, Elizabeth, and the children are forced to flee, and the exiled queen must summon all her strength to keep her family intact through tumultuous seasons of separation and heartache in The Hague. With richly rendered characters and dialogue both penetrating and nuanced, Elizabeth of Bohemia offers a rare and delightful window into the Stuart period. David Elias is an author based in Winnipeg, Manitoba. He travelled extensively in the footsteps of Elizabeth of Bohemia to visit historical sites and examine artifacts in places such as the British Library, Heidelberg Castle, and St. Vitus Cathedral in Prague., About the Author, David Elias is a novelist based in Winnipeg, Manitoba. His work has been nominated for several awards, and he has travelled extensively in the footsteps of Elizabeth of Bohemia to examine rare historical documents and artifacts in places such as the British Library, Heidelberg Castle, and St. Vitus Cathedral in Prague., Read more)",16.95,"List(Map(large -> https://m.media-amazon.com/images/I/51137wRK0SL._SX322_BO1,204,203,200_.jpg, variant -> MAIN))",List(),David Elias (Author),"List(Books, Literature & Fiction, Genre Fiction)","Map(Dimensions -> 5.5 x 0.82 x 8.5 inches, Lexile measure -> 1100L, Language -> English, Item Weight -> 15.2 ounces, ISBN 13 -> 978-1770414631, Publisher -> ECW Press (June 4, 2019), Paperback -> 360 pages, ISBN 10 -> 1770414630)",1770414630,


In [0]:
print(f"Tổng số hàng: {df_items_transformed.count()}")
print(f"Tổng số cột: {len(df_items_transformed.columns)}")

Tổng số hàng: 4448180
Tổng số cột: 14


In [0]:
df_items_transformed.groupBy("price").count().orderBy(col("count").desc()).show(50)

print("Kiểm tra số lượng NULL trong tất cả các cột của 'items':")
null_counts_expr = [
    f"count(case when {col_name} is null then 1 end) as null_{col_name}"
    for col_name in df_items_transformed.columns
]
display(df_items_transformed.selectExpr(*null_counts_expr))

+-----+------+
|price| count|
+-----+------+
| NULL|616601|
|    —|230503|
|  0.0| 98202|
| 9.99| 83806|
|14.99| 62196|
|12.99| 51534|
| 7.99| 46439|
| 6.99| 38105|
|19.99| 36708|
| 5.99| 36443|
| 8.99| 35826|
|14.95| 34935|
|11.99| 32725|
| 4.95| 32416|
|15.99| 29666|
|16.99| 28314|
|19.95| 27271|
|10.99| 26763|
|13.99| 25648|
| 4.99| 24017|
|12.95| 23236|
| 15.0| 22785|
| 9.95| 21620|
|17.99| 20995|
| 2.99| 19106|
|16.95| 18891|
| 10.0| 17333|
| 5.97| 16748|
| 3.99| 15920|
|15.95| 15705|
|24.95| 15166|
| 20.0| 14659|
| 12.0| 14137|
|24.99| 13941|
| 25.0| 12381|
|29.95| 12263|
|18.99| 12181|
|17.95| 12098|
|29.99| 11695|
|11.95| 10851|
|21.99| 10498|
|13.95| 10404|
| 16.0| 10018|
| 0.99|  9681|
|18.95|  9653|
| 18.0|  9028|
| 7.95|  8862|
| 8.95|  8678|
|10.95|  8265|
| 6.95|  8227|
+-----+------+
only showing top 50 rows
Kiểm tra số lượng NULL trong tất cả các cột của 'items':


null_main_category,null_title,null_average_rating,null_rating_number,null_features,null_description,null_price,null_images,null_videos,null_store,null_categories,null_details,null_parent_asin,null_bought_together
358,0,0,0,0,0,616601,0,0,174454,0,0,0,4448180


In [0]:
df_items_transformed.groupBy("store").count().orderBy(col("count").desc()).show(50)
df_items_transformed.groupBy("main_category").count().orderBy(col("count").desc()).show(50)

+--------------------+------+
|               store| count|
+--------------------+------+
|                NULL|174454|
|         aa (Author)|  3179|
|    Various (Author)|  3025|
|Willow Creek Pres...|  2746|
|      German Edition|  2128|
|    Japanese Edition|  2009|
|     Spanish Edition|  1912|
|         DK (Author)|  1593|
|DK Publishing (Au...|  1382|
|Hal Leonard Corp....|  1296|
|Peter Pauper Pres...|  1210|
|Charles River Edi...|  1207|
|MegaCalendars (Au...|  1192|
|  Zondervan (Author)|  1167|
|Middle English Ed...|  1144|
|    Unknown (Author)|  1143|
|Nora Roberts (Aut...|  1141|
|      French Edition|  1090|
|Trends Internatio...|  1030|
|Agatha Christie (...|  1024|
|BrownTrout Publis...|   993|
|  Anonymous (Author)|   982|
|Hal Leonard Corp....|   940|
|    Fodor's (Author)|   905|
|Stephen King (Aut...|   904|
|Roger Priddy (Aut...|   902|
|Thomas Nelson (Au...|   813|
|William Shakespea...|   738|
|Rand McNally (Aut...|   707|
| Scholastic (Author)|   688|
|National 

Sample

In [0]:
from pyspark.sql.functions import col, regexp_extract, when
from pyspark.sql.types import DoubleType

In [0]:
df_with_price_cleaned = df_items_transformed.withColumn(
    "price_cleaned", 
    regexp_extract(col("price"), r"([\d\.]+)", 1) 
)
df_items_silver = df_with_price_cleaned \
    .withColumn("price_final",               
                when(col("price_cleaned") == "", None) 
                .otherwise(col("price_cleaned").cast(DoubleType()))
               ) \
    .withColumn("store_final",               
                when(col("store") == "n/a (Author)", None)
                .otherwise(col("store"))
               ) \
    .withColumn("main_category_final",
                when(col("main_category") == "", None)
                .otherwise(col("main_category"))
               ) \
    .drop("price", "price_cleaned", "store", "main_category") \
    .withColumnRenamed("price_final", "price") \
    .withColumnRenamed("store_final", "store") \
    .withColumnRenamed("main_category_final", "main_category") \
    .filter(col("parent_asin").isNotNull()) \
    .filter(col("title").isNotNull()) \
    .dropDuplicates(["parent_asin"])

In [0]:
print(f"Count row after drop duplicate asin: {df_items_silver.count()}")

Count row after drop duplicate asin: 4448180


In [0]:
df_items_silver.write \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("`bigdata-and-bi`.silver.items_clean")

In [0]:
print("--- Hiển thị 10 dòng kết quả (items_clean): ---")
display(df_items_silver.limit(10))

--- Hiển thị 10 dòng kết quả (items_clean): ---


title,average_rating,rating_number,features,description,images,videos,categories,details,parent_asin,bought_together,price,store,main_category
The Devil Can Ride: The World's Best Motorcycle Writing,4.1,9,"List(Authors such as Hunter Thompson, Robert Pirsig, and Mark Singer have written about the motorcycle, that icon for outlaws, rebels, thieves, and beat poets. This collection of motorcycle tales features the best of the vast collection of motorcycle writing created since old Gottlieb Daimler first bolted a crude internal-combustion engine to his wooden two-wheeled Einspur in 1876. In addition to essays from Thompson and Pirsig, The Devil Can Ride features works by Peter Egan, T.E. Lawrence, James Stevenson, Jamie Elvidge, John Hall, and Kevin Cameron.)","List(Review, “This is a book that can be opened to almost any page and deliver the kind of inspiration that fuels our passion for the people and machines in motorcycling.” –, Cycle World, October 2010, About the Author, Edited by magazine journalist and author Lee Klancher. Klancher’s work has appeared in, Motorcyclist, Dirt Rider, Motorcycle Cruiser, ATV Rider, , and, Motorcycle Escape, magazines. Motorbooks published his most recent work,, Motorcycle Dream Garages, in 2009. Klancher has been riding, racing, and crashing motorcycles since he was 11 years old, and reading since he was three. He lives in Austin, TX.)","List(Map(large -> https://m.media-amazon.com/images/I/51om0aJQzPL._SX339_BO1,204,203,200_.jpg, variant -> MAIN))",List(),"List(Books, Engineering & Transportation, Automotive)","Map(Dimensions -> 6 x 1.06 x 9 inches, Language -> English, Item Weight -> 10.4 ounces, Hardcover -> 304 pages, ISBN 13 -> 978-0760334775, Publisher -> Motorbooks; First Edition (June 12, 2010), ISBN 10 -> 0760334773)",0760334773,,30.0,"Lee Klancher (Author), Kevin Cameron (Contributor), Jack Lewis (Contributor), Hunter S. Thompson (Contributor), Robert Pirsig (Contributor)",Books
Blessings,4.2,172,"List(At thirty-six, Jennie Rakowsky's dreams were coming true. She was about to marry a wonderful man, her career as a lawyer was skyrocketing, and she had never been more beautiful. And then the secret she had hidden for nineteen years threatened to shatter it all.)","List(Review, ""Entertaining !""—, Washington Post, ""Belva Plain's mixture of romance, suspense, and deeply felt familial conflicts should leave her fans well entertained.""—, Publishers Weekly, About the Author, Belva Plain lives in northern New Jersey. She is the author of the bestselling novels, Evergreen, ,, Random Winds, ,, Eden Burning, ,, Crescent City, ,, The Golden Cup, ,, Tapestry, ,, Blessings, ,, Harvest, ,, Treasures, ,, Whispers, ,, Daybreak, ,, The Carousel, ,, Promises, ,, Secrecy, ,, Homecoming, ,, Legacy of Silence, , and, Fortune's Hand, ., Excerpt. © Reprinted by permission. All rights reserved., Chapter One, The day on which the sky cracked open over Jennie's head had begun as gladly as any other day in that wonderful year. It had been the best year of her life until then.At noon she had been standing with Jay on the lip of the hill that overlooked the wild land called, by the town to which it belonged, the Green Marsh. It was one of those Indian summer intervals, when, after two weeks of rain and premature gray cold, everything suddenly burns again; the distant air burns blue and the near oaks flare red; in the marsh, cattails and spreading juniper glisten darkly after the night's rain. Canada geese come streaming, honking their long way to the south; and ducks, with a great flapping racket, splash into the pond.""You see, it's not all marsh,"" Jay explained. ""There's meadow and forest at the other end. Over a thousand acres, all wild. Been here for Lord knows how many thousand of years, just as you see it, untouched. We're trying to get the state to take it over as part of the wilderness system. That way it'll be safe forever. But we've got to hurry before the New York builders put their bid through.""""Do you suppose they'll be able to?""""God, I hope not. Imagine ruining all this!""They stood for a little while listening to the silence. Totally at ease, accustomed as they were to quiet hours with each other, they felt no need for a continuous flow of speech.A small sudden wind blew a dry shower of leaves, and at the bottom of the hill Jay's children came into sight, running with the wind. They made themselves fall, the two girls rolling their little brother in the leaves. They shrieked; the dog barked; and the wind, carrying the sounds back up the hill, shattered the Sunday peace.""Darling,"" Jay said.Turning to him, Jennie knew that he had been watching her while she watched his children.""I'm happier than anyone has a right to be,"" she murmured.He searched her face with such intensity, such love, that she felt an ache in her throat.Oh, Jennie, I can't tell you . . . You give me . . ."" He threw out his arms to encompass the whole bright scene in one characteristic, generous gesture. ""I never thought . . ."" Not finishing, he put his arms around her shoulders and drew her close.Into the curve of his arm she settled, feeling a perfect happiness. Memory ran backward to the beginning of this miracle. A year and a half before, when they had first met, Jay had been a widower for two years, his young wife having died most terribly of cancer. He had been left with two small girls and an infant son, a rather grand Upper East Side apartment, and a partnership in one of New York's most prestigious law firms, a position not inherited as sometimes happens, but earned through merit and hard effort. One of the first things Jennie had observed about Jay had been a strained expression that might signify anxiety, overwork, loneliness, or all of these. Certainly if loneliness was a problem, the city had enough desirable young women to fill a man's vacant hours, especially those of a tall young man with vivid eyes and a charming cleft in his chin. When she knew him better, she understood that he had been very, very careful about involvements because of his children. Some of his friends had asked her whether she didn't find his devotion to the children a bore or a hindrance; on the contrary, she admired it, was glad of it, and would have thought less of him if he had not felt a loving, deep responsibility toward them.She turned her face up now to see his. Yes, the look of strain was definitely gone, along with that nervous habit of pulling a strand of hair at his temple, and along with smoking too much and sleeping too little. Indeed, this last month he had stopped smoking altogether. Smiles came easily now, and certainly he looked much younger than thirty-eight.""What are you staring at, woman?""""I like you in plaid shirts and jeans.""""Better than in my Brooks Brothers vest?""""I like you best in nothing at all, since you ask.""""Same to you. Listen, I was thinking just now, would you like to have a little summer place up around here? We could build something at the far end of my parents' property, or somewhere else, or not at all. You choose.""""I can't think. I've never had so many choices in my life!""""It's time you had some, then.""She had never been one who craved choices. In her mind she stripped things bare to the core, and the core now was just her pure need to be with Jay always and forever; houses, plans,, things, –all were unimportant beside that need.""Have you decided where you want the wedding? Mother and Dad would be glad to have it at their apartment. Mother said she's already told you.""A woman was supposed to be married from her own house. But when the home consisted of two cramped rooms in a renovated walk-up tenement, even the simplest ceremony presented a problem. Obviously Jay's mother understood that, although with kindest tact she had not referred to it.""Yes. It was a lovely offer."" But in Jay's apartment, Jennie thought, it would seem a little bit like her own home. ""I'd like your place. Would that be all right? Since that's where I'm going to be living?""""I'd love it, darling. I was hoping you'd want to. So, now that's settled. One thing more and we'll be all settled. What about your office? Do you want to stay where you are or come to my firm's building? There's going to be some available space on the fifteenth floor.""""Stay where I am, Jay. My clients would be intimidated, scared to death on Madison Avenue. All my poor, broken-down women with their miserable problems and their shabby clothes . . . It would be cruel. Besides, I couldn't afford a move like that, anyway.""Jay grinned and ruffled her hair. ""Independent cuss, aren't you?""""When it comes to my law practice, yes,"" she answered seriously.She supposed that his practice must mean as much to him as hers did to her. After all, why else would he have chosen it and stayed in it? But she couldn't imagine anyone, certainly not herself, caring as deeply about wills and trusts and litigation over money as about people–the battered wives, abused children, dispossessed families, and all the other pitiable souls who came asking for help. Yet no one could be more kind and caring than Jay. And money, after all, did grease the world's wheels, didn't it? Obviously, then, somebody had to take care of it.At the foot of the hill they could see the setter's tail waving above dead weeds. The children were now stooped over.""What on earth are they doing?"" Jay asked.""Collecing leaves. I bought scrapbooks for Sue and Emily to take to science class.""""You think of everything! They're going to love you, Jennie. They do already."" He looked at his watch. ""Hey, we'd better call them. My mother's having an early lunch, so we can get back to the city by their bedtime.""The two-lane blacktop road passed dairy farms and apple growers' wide, level spreads: little old houses with battered swings on front porches stood close to big red barns; horses in their shabby winter coats drooped their heads over wire fences; here and there a glossy white-painted house at the end of a gravel drive bordered with rhododendrons and azaleas proclaimed ownership by some local banker or, more likely still, by some city family who enjoyed its two or three summer months of rural peace.""I can't believe my noisy little rooms in New York are only hours away,"" Jennie said.When the winter-brown fields gave way to the town, they entered the main street. Here chain stores, gas stations, a bowling alley, a pizza parlor, a redbrick consolidated high school, a Ford dealership, a dingy movie theater, and three or four new, low office buildings reflected modern times, while a saddlery, a volunteer fire department, and a feed store with a sign above the front entry–FOUNDED 1868–spoke of a life that had been and was now changing.""As I remember it, the town was half this size when Dad bought our place,"" Jay remarked.""Do you think of this as your true home?""""Not yet. Maybe someday when I'm my parents' age. You know, I wouldn't be surprised if they were to give up their New York apartment and stay here all year, now that Dad's selling the factory and retiring.""Mrs. Wolfe was spreading compost over a rose bed at the side of the house when they drove up. She straightened, took off her gardening gloves, and spread her arms to the little boy, who ran into them.""Did you have a good ride, Donny? Did you see the horses?""The girls interrupted. ""We went to the academy, but Donny didn't want to get on the pony.""""Daddy promised us chocolate bars, but the stores were all closed.""""A good thing, too, or you wouldn't eat any lunch. And we've a beautiful chocolate cake for dessert."" The grandmother smiled at Jennie. ""I hope we haven't tired you out this weekend.""""No, Mrs. Wolfe, I could walk ten miles a day through these hills.""""Well, I'm sure Jay will take you up on that sometime. Let's go in, shall we?""Jennie stepped aside to let the other woman precede her into the house. She must be careful to remember every little nicety. . . .It was only natural to feel unease in the presence of one's future husband's parents, wasn't it? Especially when this was her first visit after only two previous meetings, and those in the impersonal setting of a restaurant. Enid Wolfe, for all her welcoming manner, possessed an elegance that easily could be daunting. Even in her gingham shirt and denim skirt, she had it with..., Read more)","List(Map(large -> https://m.media-amazon.com/images/I/51BH67JQSEL._SY291_BO1,204,203,200_QL40_FMwebp_.jpg, variant -> MAIN))",List(),"List(Books, Romance, Historical)","Map(Dimensions -> 4.17 x 1.03 x 6.89 inches, Language -> English, Item Weight -> 6.9 ounces, ISBN 13 -> 978-0440243250, Publisher -> Dell (June 24, 2008), Mass Market Paperback -> 400 pages, ISBN 10 -> 0440243254)",0440243254,,6.47,Belva Plain (Author),Books
The War That Killed Achilles: The True Story of Homer's Iliad and the Trojan War,4.6,245,"List(""Spectacular and constantly surprising."" -Ken Burns, Written with the authority of a scholar and the vigor of a bestselling narrative historian,, The War That Killed Achilles, is a superb and utterly timely presentation of one of the timeless stories of Western civilization. As she did in, The Endurance, and, The Bounty, ,, New York Times, bestselling author Caroline Alexander has taken apart a narrative we think we know and put it back together in a way that lets us see its true power. In the process, she reveals the intended theme of Homer's masterwork-the tragic lessons of war and its enduring devastation.)","List(Review, ""In her spectacular and constantly surprising new book, Caroline Alexander has taken the 'original' war book and turned it upside down, making it, as all wars are, an excruciating story of loss..., The War that Killed Achilles, is a triumph."" -Ken Burns ""This riveting tale of ancient wars, legendary warriors, and mythical gods is at once a great adventure story and a cautionary tale of the enduring perils of hubris and ego. Achilles' life and death are instructive lessons for all of us today."" -Tom Brokaw ""Spirited and provocative...a nobly bold even rousing venture...it would be hard to find a faster, livelier, more compact introduction to such a great range of recent Iliadic explorations."" -Steve Coates,, The New York Times, ""Penetrating...reflecting her own skills [Alexander] provides her own translation of an entire chapter...a real bonus for the reader, comparing favorably with Lattimore and Fagles."" -, Boston Globe, About the Author, Caroline Alexander has written for, The New Yorker, ,, Granta, ,, Condé Nast Traveler, ,, Smithsonian, ,, Outside, , and, National Geographic, and is the author of four previous books.)","List(Map(large -> https://m.media-amazon.com/images/I/511xI4P2kXL._SY291_BO1,204,203,200_QL40_FMwebp_.jpg, variant -> MAIN))",List(),"List(Books, Literature & Fiction, History & Criticism)","Map(Dimensions -> 5.52 x 0.7 x 8.4 inches, Language -> English, Item Weight -> 9.6 ounces, ISBN 13 -> 978-0143118268, Publisher -> Penguin Books; Reprint edition (September 28, 2010), Paperback -> 320 pages, ISBN 10 -> 9780143118268)",0143118269,,11.18,Caroline Alexander (Author),Books
"Mastering machine code on your ZX81 by Baker, Toni (1982) Paperback",5.0,2,"List(Baker, Toni)",List(),"List(Map(large -> https://m.media-amazon.com/images/I/01RmK+J4pJL._BO1,204,203,200_.gif, variant -> MAIN))",List(),"List(Books, Science & Math, Mathematics)","Map(Language -> English, Item Weight -> 9.9 ounces, ISBN 13 -> 978-0835942614, Publisher -> Reston Pub. Co (January 1, 1982), Paperback -> 180 pages, ISBN 10 -> 0835942619)",0835942619,,63.07,Toni Baker (Author),Books
Coaching Youth Lacrosse,4.5,8,"List(This new edition of, Coaching Youth Lacrosse, is part of the improved generation of the American Sport Education Program's (ASEP), Coaching Youth Sports, series. A widely respected and highly popular series, this is the best collection of youth sport-specific guides, which are grounded in positive coaching principles., ASEP, the nation's No. 1 coaching education program, developed, Coaching Youth Lacrosse, to provide coaches with both an explanation of their role and concrete instructions on fulfilling that role., Coaching Youth Lacrosse, contains specific programs for both boys and girls lacrosse, including specifics on equipment, season plans, games and tactics. You will find chapters on communicating with your athletes and their parents, teaching and developing lacrosse skills, planning and conducting practices, and coaching during games., This second edition includes a special chapter on the games approach to coaching lacrosse, which makes practice more fun for the kids and teaching more effective for you, the coach.)","List(Review, "", """"Coaching Youth Lacrosse, should be under the arm of every youth lacrosse coach in the country! This book can help veteran coaches increase their skills and give novice coaches the guidance they need to coach lacrosse with confidence."""", Steve StenersenExecutive DirectorUS Lacrosse, "", From the Publisher, ""Coaching Youth Lacrosse should be under the arm of every youth lacrosse coach in the country! This book can help veteran coaches increase their skills and give novice coaches the guidance they need to coach lacrosse with confidence."", Steve Stenersen Executive Director US Lacrosse, About the Author, The, American Sport Education Program (ASEP), is the most widely used and respected sport education program in the United States. More than 30 states now require ASEP courses for their high school coaches, and more than 200 universities use ASEP courses and resources. Over one million people—coaches, parents, and directors alike—have used ASEP products since the program began in 1981. The ASEP headquarters is located in Champaign, Illinios., Read more)","List(Map(large -> https://m.media-amazon.com/images/I/51wIAsnNZzL._SX319_BO1,204,203,200_.jpg, variant -> MAIN))",List(),"List(Books, Sports & Outdoors, Other Team Sports)","Map(Dimensions -> 6 x 0.75 x 9 inches, Reading age -> 18 years and up, Language -> English, Item Weight -> 1.06 pounds, ISBN 13 -> 978-0736037945, Publisher -> Human Kinetics; 2nd edition (March 1, 2007), Paperback -> 296 pages, ISBN 10 -> 0736037942)",0736037942,,16.99,American Sport Education Program (Author),Books
A Kiss Is Still a Kiss,4.6,4,"List(The Pulitzer Prize-winning film critic and co-host of television's ""At the Movies"" offers an inside look at the film industry and its stars, power brokers, festivals, writers, producers, directors, and films)",List(),"List(Map(large -> https://m.media-amazon.com/images/I/51do7Wc9ZzL._SX332_BO1,204,203,200_.jpg, variant -> MAIN))",List(),"List(Books, Humor & Entertainment, Movies)","Map(Language -> English, Item Weight -> 1.3 pounds, Hardcover -> 256 pages, ISBN 13 -> 978-0836279573, Publisher -> Andrews McMeel Pub; First Edition (January 1, 1984), ISBN 10 -> 0836279573)",0836279573,,6.4,Roger Ebert (Author),Books
Marta Martinez Saves the World (Kaiju Revisited),4.2,5,"List(Marta Martinez has a problem, She’s an engineering student gunning to win the campus robot wars and has a raging crush on Clarence Cunningham, star quarterback. He doesn’t even know she exists, and her best friend has been less than supportive., When major kitchen appliances suddenly grow to gargantuan size and start attacking the city, Marta sets out with her cat, her neighbor, and her engineering partner, into the skyways of St. Paul to save her best friend who just happens to be trapped at a Lowertown poetry reading with Clarence Cunningham., Marta has a plan: Save the quarterback Save the world)",List(),"List(Map(large -> https://m.media-amazon.com/images/I/51G3-m7RMFL._SX322_BO1,204,203,200_.jpg, variant -> MAIN))",List(),"List(Books, Science Fiction & Fantasy, Science Fiction)","Map(Dimensions -> 5.5 x 0.2 x 8.5 inches, Language -> English, Item Weight -> 5.4 ounces, ISBN 13 -> 978-1535110150, Publisher -> CreateSpace Independent Publishing Platform (July 6, 2016), Paperback -> 80 pages, ISBN 10 -> 1535110155)",1535110155,,12.99,Victorya Chase (Author),Books
"The RBG Workout 2020 Wall Calendar: (2020 Wall Calendar, 2020 Planners and Organizers for Women, Wall Calendars for 2020)",4.8,347,List(Exercise with Supreme Court Justice Ruth Bader Ginsburg using routines from her twice-weekly workouts and get into supreme shape!),List(),"List(Map(large -> https://m.media-amazon.com/images/I/51ijoFiXz9L._SX218_BO1,204,203,200_QL40_FMwebp_.jpg, variant -> MAIN))",List(),"List(Books, Calendars)","Map(Dimensions -> 12.1 x 0.25 x 12.05 inches, Language -> English, Item Weight -> 9.5 ounces, ISBN 13 -> 978-1452177106, Publisher -> Chronicle Books (July 23, 2019), Calendar -> 24 pages, ISBN 10 -> 1452177104)",1452177104,,14.99,Bryant Johnson (Author),Books
Fun Zone Presents! Pre School Math,5.0,4,"List(A children's math book for ages 3-6 , to learn the basic concepts of counting and mathematics , while keeping the learning fun and engaging. This book is very animated with cute animals, dinosaurs, fantasy creatures like mermaids and displays children in all races. Creative puzzles that encourage learning to count by multiples , numbers that encourages proper number writing. While keeping the learning fun, happy and delightful.)",List(),"List(Map(large -> https://m.media-amazon.com/images/I/418Bq-McpIS._SX384_BO1,204,203,200_.jpg, variant -> MAIN))",List(),"List(Books, Education & Teaching, Schools & Teaching)","Map(Dimensions -> 8.5 x 0.07 x 11 inches, Language -> English, Item Weight -> 4.3 ounces, ISBN 13 -> 979-8524069856, Publisher -> Independently published (June 27, 2021), Paperback -> 28 pages)",B0989VKSD6,,4.99,Kelvin White (Author),Books
From Anvil to Pulpit: The making of Robert Collyer,5.0,3,"List(Robert Collyer was a Unitarian minister with a formidable reputation as a preacher and public speaker, and a household name in nineteenth century Chicago and New York. Collyer was a fervent opponent of slavery and served as a Chaplain with the U.S. Sanitary Commission in the Civil War when he was deeply affected by the horrors of the battlefields and military camps. His great Unity Church, and his home, were destroyed in the Great Fire of Chicago and he played a leading part in the restoration of church and city. After twenty years of dedicated work and influence in Chicago, he moved to New York where he revived the fortunes of another once great church.But this charismatic preacher and national celebrity had humble origins. Born into poverty in Yorkshire, England, he started work in a textile mill at the age of eight. Later he became a blacksmith, and after the tragic death of his young wife, turned to the Methodist Church where he became a Local Preacher. After emigrating to America in 1850 he gained work in a hammer factory on the outskirts of Philadelphia, established his family and continued with his preaching. After nine years of factory work, without theological qualifications or experience as a pastor, Collyer was appointed as an Outreach Minister to the Unitarian Church in Chicago and went on to become one of the city's leading churchmen.Robert Collyer's life exemplifies the merits of self-help and dedication and the possibilities for advancement in 'the land of opportunity'. His story sheds light on many aspects of social history in both England and America in the nineteenth century – child labor, mass emigration, self-education, the role of the Church and the nature of celebrity in an era before mass communication. His remarkable life reminds us of universal truths about the supremacy of determination, persistence, and faith.)",List(),"List(Map(large -> https://m.media-amazon.com/images/I/41kiYJQReTS._SX331_BO1,204,203,200_.jpg, variant -> MAIN))",List(),"List(Books, Biographies & Memoirs, Arts & Literature)","Map(Dimensions -> 6 x 0.75 x 9 inches, Language -> English, Item Weight -> 15.5 ounces, ISBN 13 -> 978-1527293250, Publisher -> Michael F. Dixon (May 27, 2021), Paperback -> 330 pages, ISBN 10 -> 1527293254)",1527293254,,19.9,Mike Dixon (Author),Books


### Review: bronze to silver

clean reviews table

In [0]:
df_reviews_transformed = spark.table("`bigdata-and-bi`.bronze.reviews_raw_5") \
    .withColumn("json_string", col("value").cast("string")) \
    .filter(col("json_string").isNotNull()) \
    .withColumn("parsed_json", from_json(col("json_string"), review_schema)) \
    .select("parsed_json.*") \
    .withColumn("reviewTimestamp",
                to_timestamp(col("timestamp"))) \
    .drop("timestamp")

print("Done.")

Done.


In [0]:
print(f"Tổng số dòng: {df_reviews_transformed.count()}")
print(f"Tổng số cột: {len(df_reviews_transformed.columns)}")

Tổng số dòng: 8787688
Tổng số cột: 10


In [0]:
df_reviews_transformed.groupBy("rating").count().orderBy("rating").show(50)

+------+-------+
|rating|  count|
+------+-------+
|   1.0| 244672|
|   2.0| 328591|
|   3.0| 770638|
|   4.0|1824057|
|   5.0|5619730|
+------+-------+



In [0]:
df_reviews_transformed.groupBy("helpful_vote").count().orderBy("helpful_vote").show(50)

+------------+-------+
|helpful_vote|  count|
+------------+-------+
|          -5|      1|
|           0|5493319|
|           1|1393481|
|           2| 560119|
|           3| 310919|
|           4| 197502|
|           5| 138623|
|           6| 101752|
|           7|  78009|
|           8|  61470|
|           9|  50227|
|          10|  41446|
|          11|  34766|
|          12|  29421|
|          13|  24990|
|          14|  21893|
|          15|  19069|
|          16|  16715|
|          17|  14925|
|          18|  13259|
|          19|  11797|
|          20|  10759|
|          21|   9607|
|          22|   8675|
|          23|   7766|
|          24|   7342|
|          25|   6622|
|          26|   6269|
|          27|   5592|
|          28|   5347|
|          29|   4729|
|          30|   4547|
|          31|   4233|
|          32|   3947|
|          33|   3555|
|          34|   3396|
|          35|   3121|
|          36|   3007|
|          37|   2860|
|          38|   2616|
|          

In [0]:
print("Kiểm tra số lượng NULL trong tất cả các cột của 'reviews':")
null_counts_expr = [
    f"count(case when {col_name} is null then 1 end) as null_{col_name}"
    for col_name in df_reviews_transformed.columns
]
display(df_reviews_transformed.selectExpr(*null_counts_expr))
empty_title_count = df_reviews_transformed.filter(col("title") == "").count()

Kiểm tra số lượng NULL trong tất cả các cột của 'reviews':


null_rating,null_title,null_text,null_images,null_asin,null_parent_asin,null_user_id,null_helpful_vote,null_verified_purchase,null_reviewTimestamp
0,0,0,0,0,0,0,0,0,0


In [0]:
#clean

df_reviews_silver = df_reviews_transformed \
    .filter(col("asin").isNotNull()) \
    .filter(col("user_id").isNotNull()) \
    .filter(col("rating").isNotNull()) \
    .filter(col("rating") >= 1.0) \
    .filter(col("rating") <= 5.0) \
    .dropDuplicates(["asin", "user_id", "reviewTimestamp"])

In [0]:
print(f"Tổng số dòng: {df_reviews_silver.count()}")
print(f"Tổng số cột: {len(df_reviews_silver.columns)}")

Tổng số dòng: 8719087
Tổng số cột: 10


In [0]:
df_reviews_silver.write \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("`bigdata-and-bi`.silver.reviews_clean_5_2")


In [0]:
print("--- Hiển thị 10 dòng kết quả (reviews_clean): ---")
display(df_reviews_silver.limit(10))

--- Hiển thị 10 dòng kết quả (reviews_clean): ---


rating,title,text,images,asin,parent_asin,user_id,helpful_vote,verified_purchase,reviewTimestamp
4.0,Book,Great read,List(),1558174850,1558174850,AHGPJ3OST45LWUERQAXMJNXEHOLA,0,True,2016-03-26T20:53:33.000Z
5.0,Mystifying!,"My oh My, I could hardly put it down - fun, fun, fun and extremely interesting too! Mystifying, educational, kept me on the edge of my seat from cover to cover. I enjoyed this book to the fullest. Another great book from Dan Brown!",List(),0385504225,0385504225,AFUPDCITID4PSZ7EJOJB4V2AB5IA,0,True,2010-06-20T17:20:11.000Z
5.0,Nutrition,"Gary Nulls is an excellent nutrition guru. I have several of his works, and I just recently purchase several of his DVDs. Excellent information and very helpful in solving many health related problems--you are what you eat!",List(),0451210506,0451210506,AEDAZOWHA4MMSQCIKNAUQVU5VZPQ,0,False,2010-08-09T22:54:34.000Z
5.0,Outstanding introduction to freestyle wrestling,"Most guides to combat sports are hampered by poor quality photography. This short, good-value book is an exception. The large, full-color photos show clearly how to execute moves and the text contains lots of personal testimonies from experienced wrestlers past and present.",List(),1402701071,1402701071,AHNDQE4EVGNLLGDUFW6NTDBEQCYA,3,False,2004-12-09T17:17:30.000Z
4.0,"Very interesting and innovative, but perhaps a bit overrated","I'm sure I'm courting fanboy wrath here, but I finally got around to reading ""Watchmen"" and I gotta tell ya... it ain't that great. Oh, believe me, it's helluva a comic book (""graphic novel"" is a pseduo-intellectualized marketing term). And as a work of imaginative fiction and innovative storytelling, it is impressive. I quite enjoyed reading it. I found it very interesting at several points, and even intellectually challenging (Alan Moore does love big words and literary references), but I just can't bring myself to think of it as some towering literary accomplishment for the ages. I suppose I'm just reacting to some of the slavering hyperbole heaped on in it by the geek squad (and I'm a proud member of that nerdy fraternity, mind you). Some times, things can be victims of their own hype. I think that's the case here. So go ahead and read ""Watchmen"" for yourself, if you're at all interested. It's certainly worth your time. If you approach it with a open mind and take it for what it is (a really excellent comic book), you'll be rewarded with a compelling text-and-visual story rich enough to appreciate on multiple levels. Given how much dreadful junk is out there in every medium, ""Watchmen"" really does stand pretty tall. I definitely tip my hat to it. I'm not going to genuflect at its altar though, fanboy piety be damned. [...]",List(),0930289234,0930289234,AFNO5MWQJ4R67RPXFCBKQLMMUV2A,3,True,2009-07-16T17:34:56.000Z
5.0,Expertly written,One of the best books ever written about air combat. You feel like you're one of the Jolly Rogers yourself !!,List(),0517570750,0517570750,AHFBBXF42H32I7EJDYKRG7AKMINQ,2,False,2001-11-01T11:42:30.000Z
3.0,Three Stars,Good premise but not great execution.,List(),039953671X,039953671X,AEZVDLZGCR3IHRXLZLZEXZ2BRVWA,0,True,2017-05-15T16:38:13.000Z
5.0,Worth reading,"Better to read than most ""self help"" books. It is not a ""how-to"", but it teaches you a lot about yourself. Put away your prejudices before you start reading.",List(),0969675534,0969675534,AGRURHMGC5OCDUI2JVHNFHLBHBSQ,0,False,2019-08-08T04:53:11.350Z
5.0,Great series.,"My hubby reads this type of book series, he really likes this author.",List(),1647349826,1647349826,AEDDVNHVPTTQ554AFPYDI5EJ34AQ,0,True,2022-05-17T18:17:03.274Z
4.0,Great book,Very informative.,List(),1482596148,1482596148,AFNAUKAGN4VGIYEB4GM7GAFDCHFQ,0,True,2018-11-22T01:26:29.776Z
