In [3]:
from pyspark.sql import SparkSession

spark = SparkSession.builder\
.appName('Spark Tables')\
.enableHiveSupport()\
.getOrCreate()

In [5]:
!hadoop fs -ls /data/

Found 3 items
-rw-r--r--   2 root hadoop       5488 2025-12-20 19:02 /data/customers_100.csv
-rw-r--r--   2 root hadoop        210 2025-12-22 13:34 /data/dates_data.csv
drwxr-xr-x   - root hadoop          0 2025-12-20 20:26 /data/write_output.csv


In [8]:
df = spark.read \
.format('csv')\
.option('header','True')\
.option('inferSchema','True')\
.load('/data/customers_100.csv')

In [9]:
df.show()

+-----------+-----------+---------+-----------+-------+-----------------+---------+
|customer_id|       name|     city|      state|country|registration_date|is_active|
+-----------+-----------+---------+-----------+-------+-----------------+---------+
|          0| Customer_0|     Pune|Maharashtra|  India|       2023-06-29|    false|
|          1| Customer_1|Bangalore| Tamil Nadu|  India|       2023-12-07|     true|
|          2| Customer_2|Hyderabad|    Gujarat|  India|       2023-10-27|     true|
|          3| Customer_3|Bangalore|  Karnataka|  India|       2023-10-17|    false|
|          4| Customer_4|Ahmedabad|  Karnataka|  India|       2023-03-14|    false|
|          5| Customer_5|Hyderabad|  Karnataka|  India|       2023-07-28|    false|
|          6| Customer_6|     Pune|      Delhi|  India|       2023-08-29|    false|
|          7| Customer_7|Ahmedabad|West Bengal|  India|       2023-12-28|     true|
|          8| Customer_8|     Pune|  Karnataka|  India|       2023-06-22|   

In [15]:
spark.sql('show tables').show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
+---------+---------+-----------+



In [18]:
df.createOrReplaceTempView('temp_cust')

In [19]:
spark.sql('show tables').show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|         |temp_cust|       true|
+---------+---------+-----------+



In [20]:
spark.sql('select * from temp_cust limit 5').show()

+-----------+----------+---------+-----------+-------+-----------------+---------+
|customer_id|      name|     city|      state|country|registration_date|is_active|
+-----------+----------+---------+-----------+-------+-----------------+---------+
|          0|Customer_0|     Pune|Maharashtra|  India|       2023-06-29|    false|
|          1|Customer_1|Bangalore| Tamil Nadu|  India|       2023-12-07|     true|
|          2|Customer_2|Hyderabad|    Gujarat|  India|       2023-10-27|     true|
|          3|Customer_3|Bangalore|  Karnataka|  India|       2023-10-17|    false|
|          4|Customer_4|Ahmedabad|  Karnataka|  India|       2023-03-14|    false|
+-----------+----------+---------+-----------+-------+-----------------+---------+



In [None]:
df.createOrReplaceGlobalTempView('temp_cust')

In [21]:
spark.sql('show tables in global_temp').show()

+-----------+---------+-----------+
|  namespace|tableName|isTemporary|
+-----------+---------+-----------+
|global_temp|temp_cust|       true|
|           |temp_cust|       true|
+-----------+---------+-----------+



In [22]:
spark.sql('select * from global_temp.temp_cust limit 5').show()

+-----------+----------+---------+-----------+-------+-----------------+---------+
|customer_id|      name|     city|      state|country|registration_date|is_active|
+-----------+----------+---------+-----------+-------+-----------------+---------+
|          0|Customer_0|     Pune|Maharashtra|  India|       2023-06-29|    false|
|          1|Customer_1|Bangalore| Tamil Nadu|  India|       2023-12-07|     true|
|          2|Customer_2|Hyderabad|    Gujarat|  India|       2023-10-27|     true|
|          3|Customer_3|Bangalore|  Karnataka|  India|       2023-10-17|    false|
|          4|Customer_4|Ahmedabad|  Karnataka|  India|       2023-03-14|    false|
+-----------+----------+---------+-----------+-------+-----------------+---------+



In [23]:
spark.sql('drop table global_temp.temp_cust')

DataFrame[]

In [24]:
spark.sql('show tables in global_temp').show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|         |temp_cust|       true|
+---------+---------+-----------+



In [25]:
# Persistent table

In [26]:
 df.write.mode('overwrite').saveAsTable('cust_pers')

25/12/23 21:44:57 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.


In [28]:
spark.sql('show tables').show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|  default|cust_pers|      false|
|         |temp_cust|       true|
+---------+---------+-----------+



In [29]:
spark_new = spark.newSession()

In [42]:
spark_new.sparkContext.applicationId

AttributeError: 'NoneType' object has no attribute 'sc'

In [32]:
spark.sparkContext.applicationId

'application_1766520098500_0012'

In [40]:
spark_new.sql('show tables').show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|  default|cust_pers|      false|
+---------+---------+-----------+



In [35]:
spark.sql('show tables').show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|  default|cust_pers|      false|
|         |temp_cust|       true|
+---------+---------+-----------+



In [36]:
spark.sql('describe cust_pers').show()

+-----------------+---------+-------+
|         col_name|data_type|comment|
+-----------------+---------+-------+
|      customer_id|      int|   NULL|
|             name|   string|   NULL|
|             city|   string|   NULL|
|            state|   string|   NULL|
|          country|   string|   NULL|
|registration_date|     date|   NULL|
|        is_active|  boolean|   NULL|
+-----------------+---------+-------+



In [38]:
spark.sql('describe extended cust_pers').show(truncate=False)

+----------------------------+-------------------------------------------+-------+
|col_name                    |data_type                                  |comment|
+----------------------------+-------------------------------------------+-------+
|customer_id                 |int                                        |NULL   |
|name                        |string                                     |NULL   |
|city                        |string                                     |NULL   |
|state                       |string                                     |NULL   |
|country                     |string                                     |NULL   |
|registration_date           |date                                       |NULL   |
|is_active                   |boolean                                    |NULL   |
|                            |                                           |       |
|# Detailed Table Information|                                           |       |
|Cat

In [41]:
spark_new.stop()

In [43]:
spark.stop()