In [1]:
from pyspark.sql import SparkSession
import getpass

username = getpass.getuser()

spark = SparkSession. \
	builder. \
	config('spark.ui.port', '0'). \
	config("spark.sql.warehouse.dir", f"/user/{username}/warehouse"). \
	enableHiveSupport(). \
	master('yarn'). \
	getOrCreate()

In [2]:
spark.sql("create database if not exists itv014945_db")

In [3]:
spark.sql("show databases").show(5)

+--------------------+
|           namespace|
+--------------------+
|0000000000000_msdian|
|0000000000000_nav...|
|0000000009874_retail|
|          00000_2_db|
|       00000assg5_db|
+--------------------+
only showing top 5 rows



### To search the database you created

In [4]:
spark.sql("show databases").filter("namespace = 'itv014945_db'").show()

+------------+
|   namespace|
+------------+
|itv014945_db|
+------------+



### To search the database created by you

In [5]:
spark.sql("show databases").filter("namespace like 'itv014945_%'").show()

+------------+
|   namespace|
+------------+
|itv014945_db|
+------------+



### To search tables

In [6]:
spark.sql("show tables").show(5)

+--------+--------------+-----------+
|database|     tableName|isTemporary|
+--------+--------------+-----------+
| default|         1htab|      false|
| default|41group_movies|      false|
| default| 4group_movies|      false|
| default|          4tab|      false|
| default| 6_flags_simon|      false|
+--------+--------------+-----------+
only showing top 5 rows



### To search or use your database

In [7]:
spark.sql("use itv014945_db") 

In [8]:
spark.sql("show tables").show(5)

+------------+-----------+-----------+
|    database|  tableName|isTemporary|
+------------+-----------+-----------+
|itv014945_db|     orders|      false|
|itv014945_db| orders_ext|      false|
|itv014945_db|sampletable|      false|
+------------+-----------+-----------+



In [9]:
spark.sql("create table if not exists itv014945_db.sampleTable(id integer, name string)")

In [10]:
spark.sql("show tables").show(5)

+------------+-----------+-----------+
|    database|  tableName|isTemporary|
+------------+-----------+-----------+
|itv014945_db|     orders|      false|
|itv014945_db| orders_ext|      false|
|itv014945_db|sampletable|      false|
+------------+-----------+-----------+



In [28]:
spark.sql("insert into itv014945_db.sampleTable values(1,'Big Data'),(2,'Spark')")

In [29]:
spark.sql("select * from itv014945_db.sampleTable")

id,name
1,Big Data
2,Spark


In [13]:
spark.sql("describe table itv014945_db.sampleTable").show()

+--------+---------+-------+
|col_name|data_type|comment|
+--------+---------+-------+
|      id|      int|   null|
|    name|   string|   null|
+--------+---------+-------+



In [14]:
spark.sql("describe extended itv014945_db.sampleTable").show(truncate=False)

+----------------------------+----------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                         |comment|
+----------------------------+----------------------------------------------------------------------------------+-------+
|id                          |int                                                                               |null   |
|name                        |string                                                                            |null   |
|                            |                                                                                  |       |
|# Detailed Table Information|                                                                                  |       |
|Database                    |itv014945_db                                                                      |       |
|Table                  

In [15]:
spark.sql("describe formatted itv014945_db.sampleTable").show(truncate=False)

+----------------------------+----------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                         |comment|
+----------------------------+----------------------------------------------------------------------------------+-------+
|id                          |int                                                                               |null   |
|name                        |string                                                                            |null   |
|                            |                                                                                  |       |
|# Detailed Table Information|                                                                                  |       |
|Database                    |itv014945_db                                                                      |       |
|Table                  

In [23]:
spark.sql("create table itv014945_db.orders_ext (order_id integer,order_date string,customer_id integer,order_status string) using csv location '/public/trendytech/orders/orders.csv'")

In [24]:
spark.sql("describe formatted itv014945_db.orders_ext").show(truncate = False)

+----------------------------+-----------------------------------------------------------------+-------+
|col_name                    |data_type                                                        |comment|
+----------------------------+-----------------------------------------------------------------+-------+
|order_id                    |int                                                              |null   |
|order_date                  |string                                                           |null   |
|customer_id                 |int                                                              |null   |
|order_status                |string                                                           |null   |
|                            |                                                                 |       |
|# Detailed Table Information|                                                                 |       |
|Database                    |itv014945_db             

In [26]:
spark.sql("select * from itv014945_db.orders_ext limit 5").show(truncate=False)

+--------+---------------------+-----------+---------------+
|order_id|order_date           |customer_id|order_status   |
+--------+---------------------+-----------+---------------+
|1       |2013-07-25 00:00:00.0|11599      |CLOSED         |
|2       |2013-07-25 00:00:00.0|256        |PENDING_PAYMENT|
|3       |2013-07-25 00:00:00.0|12111      |COMPLETE       |
|4       |2013-07-25 00:00:00.0|8827       |CLOSED         |
|5       |2013-07-25 00:00:00.0|11318      |COMPLETE       |
+--------+---------------------+-----------+---------------+

