# Создание витрин в Clickhouse

### Загрузка DDL в Clickhouse

In [1]:
!pip install clickhouse-connect



In [2]:
import clickhouse_connect

client = clickhouse_connect.get_client(
    host='demo13b.ddnsfree.com',
    port=30001,
    username='tim',
    password='DogeCoin'
)

with open('clickhouse_ddl.sql', 'r') as f:
    commands = f.read().split(';')

for command in commands[:-1]:
    client.command(command.strip())

### Загрузка таблиц из postgres

In [3]:
from pyspark.sql import SparkSession

In [4]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/work/jars/postgresql-42.7.3.jar,/home/jovyan/work/jars/clickhouse-jdbc-0.4.6-all.jar pyspark-shell'

In [5]:
spark = SparkSession.builder.appName("DataLoader").getOrCreate()

In [6]:
postgres_url = "jdbc:postgresql://demo13b.ddnsfree.com:30000/big_data_snowflake"
postgres_properties = {
    "user": "admin",
    "password": "DogeCoin",
    "driver": "org.postgresql.Driver"
}

In [7]:
customers_df = spark.read.jdbc(url=postgres_url, properties=postgres_properties, table='customers')
sellers_df = spark.read.jdbc(url=postgres_url, properties=postgres_properties, table='sellers')
stores_df = spark.read.jdbc(url=postgres_url, properties=postgres_properties, table='stores')
suppliers_df = spark.read.jdbc(url=postgres_url, properties=postgres_properties, table='suppliers')
products_df = spark.read.jdbc(url=postgres_url, properties=postgres_properties, table='products')
sales_df = spark.read.jdbc(url=postgres_url, properties=postgres_properties, table='sales')

### Витрина продаж по продуктам

In [8]:
from pyspark.sql.functions import sum, avg, count, desc, round, year, month, col

In [9]:
product_mart = sales_df.join(
    products_df,
    (sales_df.product_id == products_df.id)
).groupBy(
    products_df.id.alias('product_id'),
    products_df.name.alias('product_name'),
    products_df.brand,
    products_df.description,
    products_df.price,
    products_df.category.alias('product_category'),
).agg(
    sum(sales_df.quantity).alias('total_quantity'),
    round(sum(sales_df.total_price), 2).alias('total_revenue'),
    count('*').alias('total_sales'),
    round(avg(products_df.rating), 2).alias('avg_rating'),
    sum(products_df.reviews).alias('total_reviews')
)

product_mart.show()

+----------+------------+-------------+--------------------+-----+----------------+--------------+-------------+-----------+----------+-------------+
|product_id|product_name|        brand|         description|price|product_category|total_quantity|total_revenue|total_sales|avg_rating|total_reviews|
+----------+------------+-------------+--------------------+-----+----------------+--------------+-------------+-----------+----------+-------------+
|      5850|     Cat Toy|    Bubblebox|Donec diam neque,...|62.03|            Cage|             7|       436.43|          1|       1.1|          286|
|       588|    Dog Food|     Feedspan|Praesent blandit....|95.97|            Food|             9|       398.55|          1|       4.1|           60|
|      5284|    Dog Food|      Camimbo|Morbi non lectus....| 5.71|            Food|             9|       281.92|          1|       2.8|          473|
|      8583|     Cat Toy|  Twitterwire|Duis bibendum. Mo...|30.06|            Food|             5|  

In [10]:
top_10 = product_mart.orderBy(desc('total_quantity')).limit(10)

top_10.show()

+----------+------------+-----------+--------------------+-----+----------------+--------------+-------------+-----------+----------+-------------+
|product_id|product_name|      brand|         description|price|product_category|total_quantity|total_revenue|total_sales|avg_rating|total_reviews|
+----------+------------+-----------+--------------------+-----+----------------+--------------+-------------+-----------+----------+-------------+
|      4987|   Bird Cage|      Skyvu|Quisque porta vol...|67.68|             Toy|            10|        74.65|          1|       1.5|          112|
|      6690|     Cat Toy|      Quimm|Duis aliquam conv...|66.25|             Toy|            10|       453.14|          1|       3.0|          220|
|      1685|     Cat Toy|    Gabtype|Cras non velit ne...|42.86|            Cage|            10|       409.68|          1|       4.1|          796|
|      1264|   Bird Cage|Jabberstorm|Duis bibendum. Mo...|87.83|            Food|            10|        22.66|  

In [11]:
category_revenue = product_mart.groupBy(
    'product_category'
).agg(
    round(sum('total_revenue'), 2).alias('total_revenue')
)

category_revenue.show()

+----------------+-------------+
|product_category|total_revenue|
+----------------+-------------+
|            Cage|    831117.94|
|            Food|    830632.55|
|             Toy|    868101.63|
+----------------+-------------+



In [12]:
clickhouse_url = "jdbc:clickhouse://demo13b.ddnsfree.com:30001"
clickhouse_properties = {
    "user": "tim",
    "password": "DogeCoin",
    "driver": "com.clickhouse.jdbc.ClickHouseDriver"
}

In [13]:
product_mart.write.jdbc(url=clickhouse_url, properties=clickhouse_properties, mode='append', table='product_mart')

### Витрина продаж по клиентам

In [14]:
customers_mart = sales_df.join(
    customers_df,
    (sales_df.customer_id == customers_df.id)
).groupBy(
    customers_df.id.alias('customer_id'),
    customers_df.first_name,
    customers_df.last_name,
    customers_df.email,
    customers_df.country,
).agg(
    round(sum(sales_df.total_price), 2).alias('total_spent'),
    round(avg(sales_df.total_price), 2).alias('avg_receipt'),
    count('*').alias('total_purchases')
)

customers_mart.show()

+-----------+----------+------------+--------------------+--------------------+-----------+-----------+---------------+
|customer_id|first_name|   last_name|               email|             country|total_spent|avg_receipt|total_purchases|
+-----------+----------+------------+--------------------+--------------------+-----------+-----------+---------------+
|       2941|    Anissa|      Melson|lvesty6h@parallel...|            Bulgaria|     154.59|     154.59|              1|
|       6879|  Gabriela|    Kerrigan|    gdecv@dion.ne.jp|               China|      277.6|      277.6|              1|
|        487|    Moises|         Mea|cmelleygt@redcros...|              France|     469.85|     469.85|              1|
|       4545|     Gabie|     Casassa|ssteerrm@amazon.c...|              Poland|     375.56|     375.56|              1|
|        635|    Muriel|      Rubrow|jpoulden2c@discuz...|           Indonesia|     173.39|     173.39|              1|
|       9192|      Dora|      Muslim|bfi

In [15]:
top_10_customers = customers_mart.orderBy(desc('total_spent')).limit(10)

top_10_customers.show()

+-----------+----------+----------+--------------------+---------+-----------+-----------+---------------+
|customer_id|first_name| last_name|               email|  country|total_spent|avg_receipt|total_purchases|
+-----------+----------+----------+--------------------+---------+-----------+-----------+---------------+
|       6885|       Gus| Hartshorn| bfeasby57@youku.com|  Albania|     499.85|     499.85|              1|
|       1487|     Hayes|    McKain|sstappardbp@busin...| Portugal|      499.8|      499.8|              1|
|       8002|     Dawna|     Impey|    rivattspm@un.org|Indonesia|     499.76|     499.76|              1|
|       4452|       Ava|     Lomas|dsorea0@geocities...|    China|     499.76|     499.76|              1|
|       1772|   Lavinia| Horsburgh|previllh3@tinyurl...|   Poland|     499.73|     499.73|              1|
|       5945|      Dame|Auchinleck|jthurnhamqe@sourc...|Indonesia|     499.71|     499.71|              1|
|       1556|  Isahella|    Colley|bs

In [16]:
customer_country_distribution = customers_mart.groupBy(
    'country'
).agg(
    count('*').alias('customers')
).orderBy(
    desc('customers')
)

customer_country_distribution.show()

+--------------+---------+
|       country|customers|
+--------------+---------+
|         China|     1738|
|     Indonesia|     1174|
|        Russia|      628|
|   Philippines|      555|
|        Brazil|      385|
|      Portugal|      336|
|        Poland|      332|
|        France|      322|
|        Sweden|      264|
| United States|      211|
|         Japan|      201|
|       Ukraine|      155|
|      Colombia|      152|
|Czech Republic|      140|
|        Canada|      137|
|      Thailand|      126|
|          Peru|      123|
|        Greece|      116|
|     Argentina|      113|
|       Nigeria|      103|
+--------------+---------+
only showing top 20 rows



In [17]:
customers_mart.write.jdbc(url=clickhouse_url, properties=clickhouse_properties, mode='append', table='customers_mart')

### Витрина продаж по времени

In [18]:
time_mart = sales_df.select(
    'id',
    'date',
    'quantity',
    'total_price'
).withColumn('year', year('date')
).withColumn('month', month('date')
).groupBy(
    'year',
    'month'
).agg(
    count('id').alias('total_sales'),
    sum('quantity').alias('total_quantity'),
    round(avg('quantity'), 2).alias('avg_sale_size'),
    round(sum('total_price'), 2).alias('total_revenue')
).orderBy(
    desc('year'),
    desc('month')
)

time_mart.show()

+----+-----+-----------+--------------+-------------+-------------+
|year|month|total_sales|total_quantity|avg_sale_size|total_revenue|
+----+-----+-----------+--------------+-------------+-------------+
|2021|   12|        770|          4335|         5.63|    191368.86|
|2021|   11|        801|          4297|         5.36|    200154.69|
|2021|   10|        892|          4976|         5.58|    228743.32|
|2021|    9|        839|          4507|         5.37|    210623.43|
|2021|    8|        897|          4818|         5.37|    221275.78|
|2021|    7|        858|          4750|         5.54|    220496.51|
|2021|    6|        822|          4438|          5.4|     215042.8|
|2021|    5|        828|          4451|         5.38|    211764.86|
|2021|    4|        837|          4564|         5.45|    206592.82|
|2021|    3|        843|          4561|         5.41|     207282.2|
|2021|    2|        739|          4070|         5.51|    192348.31|
|2021|    1|        874|          4856|         

In [19]:
time_mart.write.jdbc(url=clickhouse_url, properties=clickhouse_properties, mode='append', table='time_mart')

### Витрина продаж по магазинам

In [20]:
stores_mart = sales_df.join(
    stores_df,
    (sales_df.store_id == stores_df.id)
).groupBy(
    sales_df.store_id,
    stores_df.name.alias('store_name'),
    stores_df.location,
    stores_df.city,
    stores_df.state,
    stores_df.country,
    stores_df.phone,
    stores_df.email
).agg(
    sum(sales_df.quantity).alias('total_quantity'),
    count('*').alias('total_sales'),
    round(sum(sales_df.total_price), 2).alias('total_revenue'),
    round(avg(sales_df.total_price), 2).alias('avg_receipt')
)

stores_mart.show()

+--------+------------+------------+------------------+-----+--------------------+------------+--------------------+--------------+-----------+-------------+-----------+
|store_id|  store_name|    location|              city|state|             country|       phone|               email|total_quantity|total_sales|total_revenue|avg_receipt|
+--------+------------+------------+------------------+-----+--------------------+------------+--------------------+--------------+-----------+-------------+-----------+
|      26|   Babbleset|  14th Floor|          Cibitung|     |           Indonesia|276-186-3743|    kpidgen62@hp.com|            10|          1|       106.84|     106.84|
|      29|      Skaboo|     Apt 570|           Barueri|     |           Indonesia|169-243-5134|kduckhouseje@tutt...|             1|          1|        45.83|      45.83|
|     474|       Abata|PO Box 12980|    Timiryazevskiy|     |              Greece|707-559-3117|dmackellar7e@harv...|             6|          1|       

In [21]:
top_5_stores = stores_mart.orderBy(desc('total_revenue')).limit(5)

top_5_stores.show()

+--------+-----------+----------+---------+-----+------------+------------+--------------------+--------------+-----------+-------------+-----------+
|store_id| store_name|  location|     city|state|     country|       phone|               email|total_quantity|total_sales|total_revenue|avg_receipt|
+--------+-----------+----------+---------+-----+------------+------------+--------------------+--------------+-----------+-------------+-----------+
|    6008|       DabZ|13th Floor|   Grekan|     |South Africa|206-444-7223|bfeasby57@archive...|             7|          1|       499.85|     499.85|
|    3254|Thoughtblab|  Apt 1200|    Fonte|   13|      Poland|797-399-4968|sstappardbp@amebl...|             9|          1|        499.8|      499.8|
|    4343|   Edgeblab|17th Floor|    Pesek|     |   Indonesia|986-221-7024|rivattspm@netscap...|             8|          1|       499.76|     499.76|
|    2129|     Camido|  Apt 1720|Longzhong|     |      Sweden|591-803-5126|dsorea0@people.co...|    

In [22]:
country_store_distribution = stores_mart.groupBy(
    'country'
).agg(
    sum('total_quantity').alias('total_quantity'),
    sum('total_sales').alias('total_sales'),
    round(sum('total_revenue'), 2).alias('total_revenue'),
    round(avg('avg_receipt'), 2).alias('avg_receipt')
).orderBy(
    desc('total_sales'),
    desc('total_quantity')
)

country_store_distribution.show()

+--------------+--------------+-----------+-------------+-----------+
|       country|total_quantity|total_sales|total_revenue|avg_receipt|
+--------------+--------------+-----------+-------------+-----------+
|         China|         10331|       1885|    485709.57|     257.67|
|     Indonesia|          6110|       1100|    279350.16|     253.95|
|        Russia|          3002|        550|    138155.41|     251.19|
|   Philippines|          2509|        492|    125385.36|     254.85|
|        Brazil|          2130|        384|     99733.67|     259.72|
|      Portugal|          1829|        336|     87104.28|     259.24|
|        Poland|          1824|        332|     81313.13|     244.92|
|        France|          1705|        323|     82391.57|     255.08|
|        Sweden|          1370|        242|     64148.78|     265.08|
| United States|          1206|        209|     52713.72|     252.22|
|         Japan|           974|        172|     42425.52|     246.66|
|Czech Republic|    

In [23]:
city_store_distribution = stores_mart.groupBy(
    'city'
).agg(
    sum('total_quantity').alias('total_quantity'),
    sum('total_sales').alias('total_sales'),
    round(sum('total_revenue'), 2).alias('total_revenue'),
    round(avg('avg_receipt'), 2).alias('avg_receipt')
).orderBy(
    desc('total_sales'),
    desc('total_quantity')
)

city_store_distribution.show()

+-------------+--------------+-----------+-------------+-----------+
|         city|total_quantity|total_sales|total_revenue|avg_receipt|
+-------------+--------------+-----------+-------------+-----------+
|    Stockholm|           169|         25|       6881.7|     275.27|
|San Francisco|            96|         16|      4576.18|     286.01|
|  San Antonio|            81|         15|      4080.05|      272.0|
|         Oslo|            71|         14|      3699.71|     264.27|
|    Marseille|            57|         11|      2036.65|     185.15|
|         Lyon|            58|          9|      2187.88|      243.1|
|   San Isidro|            52|          9|      2190.05|     243.34|
|   Santa Cruz|            48|          9|      2271.79|     252.42|
|     Västerås|            57|          8|      2687.33|     335.92|
|    København|            46|          8|      2171.69|     271.46|
|       Nantes|            40|          8|      1957.13|     244.64|
|   Buenavista|            37|    

In [24]:
stores_mart.write.jdbc(url=clickhouse_url, properties=clickhouse_properties, mode='append', table='stores_mart')

### Витрина продаж по поставщикам

In [25]:
suppliers_mart = sales_df.join(
    suppliers_df,
    (sales_df.supplier_id == suppliers_df.id)
).join(
    products_df,
    (sales_df.product_id == products_df.id)
).groupBy(
    suppliers_df.id.alias('supplier_id'),
    suppliers_df.name,
    suppliers_df.contact,
    suppliers_df.email,
    suppliers_df.phone,
    suppliers_df.city,
    suppliers_df.country
).agg(
    sum(sales_df.quantity).alias('total_quantity'),
    round(sum(sales_df.total_price), 2).alias('total_revenue'),
    count('*').alias('total_sales')
)

suppliers_mart.show()

+-----------+-------------+-------------------+--------------------+------------+-------------+-----------+--------------+-------------+-----------+
|supplier_id|         name|            contact|               email|       phone|         city|    country|total_quantity|total_revenue|total_sales|
+-----------+-------------+-------------------+--------------------+------------+-------------+-----------+--------------+-------------+-----------+
|       6297|     Livetube|    Alon Bartolozzi|abartolozzi99@stu...|442-769-2128|       Isulan|    Ukraine|             3|       415.54|          1|
|       7819|     Realcube|           Bax Elfe|belfeqz@columbia.edu|801-832-3129|        Rrapë| Azerbaijan|             8|       251.86|          1|
|       1211|         Jayo|        Jeddy Keyme|  jkeymej1@jimdo.com|468-420-7891|Fastovetskaya|      China|            10|       352.87|          1|
|       4277|       Wikivu|   Gwenore Ivchenko|givchenkoi@source...|210-108-7872|Novaya Lyalya|Philippines

In [26]:
top_5_suppliers = suppliers_mart.orderBy(desc('total_revenue')).limit(5)

top_5_suppliers.show()

+-----------+----------+---------------+--------------------+------------+----------+---------+--------------+-------------+-----------+
|supplier_id|      name|        contact|               email|       phone|      city|  country|total_quantity|total_revenue|total_sales|
+-----------+----------+---------------+--------------------+------------+----------+---------+--------------+-------------+-----------+
|       6275|Brainverse|Barbabas Feasby|    bfeasby57@ed.gov|985-466-9726|    Tudela|  Ireland|             7|       499.85|          1|
|       1561|     Jamia|   Sax Stappard|sstappardbp@webno...|502-283-5604|     Luleå|   Russia|             9|        499.8|          1|
|        714|     Eabox|      Dell Sore|     dsorea0@soup.io|196-123-7189|   Sloboda| Portugal|             2|       499.76|          1|
|        800|   Demimbu|  Reggis Ivatts|   rivattspm@nps.gov|110-740-4595|   Begejci|    China|             8|       499.76|          1|
|       5566|Browsezoom| Padgett Revill|p

In [27]:
avg_supplier_price = suppliers_mart.select(
    'supplier_id',
    'name',
    'contact',
    'email',
    'phone',
    'city',
    'country',
    round(col('total_revenue') / col('total_quantity'), 2).alias('avg_price') 
)

avg_supplier_price.show()

+-----------+-------------+-------------------+--------------------+------------+-------------+-----------+---------+
|supplier_id|         name|            contact|               email|       phone|         city|    country|avg_price|
+-----------+-------------+-------------------+--------------------+------------+-------------+-----------+---------+
|       6297|     Livetube|    Alon Bartolozzi|abartolozzi99@stu...|442-769-2128|       Isulan|    Ukraine|   138.51|
|       7819|     Realcube|           Bax Elfe|belfeqz@columbia.edu|801-832-3129|        Rrapë| Azerbaijan|    31.48|
|       1211|         Jayo|        Jeddy Keyme|  jkeymej1@jimdo.com|468-420-7891|Fastovetskaya|      China|    35.29|
|       4277|       Wikivu|   Gwenore Ivchenko|givchenkoi@source...|210-108-7872|Novaya Lyalya|Philippines|    30.82|
|       4806|     Fanoodle|     Charlie Attard|cattardoq@science...|122-819-5250|     Lahishyn|  Indonesia|     5.98|
|       2983|       Skinix|       Kay Caselick|kcaselick

In [28]:
supplier_country_distribution = suppliers_mart.groupBy(
    'country'
).agg(
    sum('total_sales').alias('total_sales')
).orderBy(
    desc('total_sales')
)

supplier_country_distribution.show()

+--------------+-----------+
|       country|total_sales|
+--------------+-----------+
|         China|       1921|
|     Indonesia|       1079|
|        Russia|        582|
|   Philippines|        536|
|        Brazil|        376|
|      Portugal|        343|
|        Poland|        321|
|        France|        308|
|        Sweden|        217|
| United States|        213|
|Czech Republic|        176|
|         Japan|        171|
|       Ukraine|        169|
|      Thailand|        163|
|      Colombia|        153|
|          Peru|        152|
|     Argentina|        131|
|        Greece|        124|
|        Canada|        113|
|        Mexico|         99|
+--------------+-----------+
only showing top 20 rows



In [29]:
suppliers_mart.write.jdbc(url=clickhouse_url, properties=clickhouse_properties, mode='append', table='suppliers_mart')

### Витрина качества продукции

In [30]:
quality_mart = sales_df.join(
    products_df,
    (sales_df.product_id == products_df.id)
).groupBy(
    products_df.id.alias('product_id'),
    products_df.name.alias('product_name'),
    products_df.brand,
    products_df.description,
    products_df.price,
    products_df.category.alias('product_category'),
    products_df.rating,
    products_df.reviews
).agg(
    sum(sales_df.quantity).alias('total_quantity'),
    round(sum(sales_df.total_price), 2).alias('total_revenue'),
    count('*').alias('total_sales')
)

quality_mart.show()

+----------+------------+----------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|product_id|product_name|     brand|         description|price|product_category|rating|reviews|total_quantity|total_revenue|total_sales|
+----------+------------+----------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|      9156|   Bird Cage|    Meezzy|Sed ante. Vivamus...|34.96|            Food|   1.5|    494|             8|       414.39|          1|
|      5343|   Bird Cage|Realbridge|Aenean lectus. Pe...|81.01|            Food|   2.2|    611|            10|       422.34|          1|
|      2069|     Cat Toy|  Topdrive|Curabitur in libe...|75.15|             Toy|   1.0|    506|             5|        197.6|          1|
|      4970|     Cat Toy|   Jaxspan|In hac habitasse ...|95.87|            Cage|   2.1|    877|             1|        72.61|          1|
|      2404|   Bird Cage|   Demimbu|Morbi

In [31]:
greatest_rating_products = quality_mart.orderBy(desc('rating')).limit(5)

greatest_rating_products.show()

+----------+------------+--------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|product_id|product_name|   brand|         description|price|product_category|rating|reviews|total_quantity|total_revenue|total_sales|
+----------+------------+--------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|      7675|   Bird Cage|   LiveZ|Curabitur at ipsu...| 96.7|            Cage|   5.0|    953|             4|       258.34|          1|
|      8673|    Dog Food|Zoomcast|Vestibulum ac est...|18.93|            Food|   5.0|    911|             9|       270.86|          1|
|      7642|   Bird Cage|    Layo|Cras non velit ne...|65.52|             Toy|   5.0|    559|             3|        14.27|          1|
|      3728|    Dog Food|   Zoovu|Maecenas ut massa...|23.26|             Toy|   5.0|    966|             1|       410.77|          1|
|       413|   Bird Cage|Centimia|Duis bibendum, fe...|

In [32]:
lowest_rating_products = quality_mart.orderBy('rating').limit(5)

lowest_rating_products.show()

+----------+------------+-------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|product_id|product_name|  brand|         description|price|product_category|rating|reviews|total_quantity|total_revenue|total_sales|
+----------+------------+-------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|      4908|   Bird Cage|Jetwire|In quis justo. Ma...|30.13|             Toy|   1.0|    696|             6|        318.7|          1|
|      7879|     Cat Toy| Kaymbo|Mauris enim leo, ...|91.38|            Food|   1.0|    476|             3|       415.54|          1|
|      9472|    Dog Food|  Jatri|Proin leo odio, p...|36.52|            Food|   1.0|    738|             3|        373.2|          1|
|      2261|    Dog Food|Wikibox|Duis aliquam conv...|90.35|            Cage|   1.0|    639|             1|        469.7|          1|
|      9583|    Dog Food|Rhycero|In hac habitasse ...|58.63|  

In [33]:
sales_quality_corellation = quality_mart.groupBy(
    'rating'
).agg(
    sum('total_sales').alias('sales')
).orderBy(
    desc('rating')
)

sales_quality_corellation.show()

+------+-----+
|rating|sales|
+------+-----+
|   5.0|  135|
|   4.9|  257|
|   4.8|  268|
|   4.7|  274|
|   4.6|  242|
|   4.5|  291|
|   4.4|  241|
|   4.3|  235|
|   4.2|  225|
|   4.1|  250|
|   4.0|  212|
|   3.9|  260|
|   3.8|  250|
|   3.7|  286|
|   3.6|  229|
|   3.5|  233|
|   3.4|  255|
|   3.3|  268|
|   3.2|  221|
|   3.1|  241|
+------+-----+
only showing top 20 rows



In [34]:
top_reviewed_products = quality_mart.orderBy(desc('reviews')).limit(10)

top_reviewed_products.show()

+----------+------------+-----------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|product_id|product_name|      brand|         description|price|product_category|rating|reviews|total_quantity|total_revenue|total_sales|
+----------+------------+-----------+--------------------+-----+----------------+------+-------+--------------+-------------+-----------+
|      1524|   Bird Cage|   Edgewire|Lorem ipsum dolor...|38.43|             Toy|   1.7|   1000|             2|        43.54|          1|
|       884|    Dog Food|   Gigaclub|Maecenas leo odio...| 44.9|            Food|   1.7|   1000|             7|       311.36|          1|
|      7705|    Dog Food|     Skinix|Nullam sit amet t...|31.26|            Food|   2.9|   1000|            10|       398.87|          1|
|      8680|     Cat Toy|    Edgeify|Praesent blandit....|34.69|             Toy|   1.6|   1000|             4|       162.89|          1|
|       736|   Bird Cage|     Rhyl

In [35]:
quality_mart.write.jdbc(url=clickhouse_url, properties=clickhouse_properties, mode='append', table='quality_mart')