# Initializing SparkSession for SQL Operations in PySpark

In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("SparkSQLExample") \
    .getOrCreate()

In [3]:
from pyspark.sql import Row

# Sample employee data
data = [
    Row(EmpID=101, Name="Ravi", Department="Sales", Salary=50000),
    Row(EmpID=102, Name="Sneha", Department="Engineering", Salary=80000),
    Row(EmpID=103, Name="Kabir", Department="HR", Salary=45000),
    Row(EmpID=104, Name="Anita", Department="Engineering", Salary=85000),
    Row(EmpID=105, Name="Amit", Department="Sales", Salary=55000),
]

df = spark.createDataFrame(data)
df.show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [4]:
df.createOrReplaceTempView("employees")

In [5]:
spark.sql("SELECT * FROM employees").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [6]:
spark.sql("SELECT Name, Salary FROM employees WHERE salary > 60000").show()

+-----+------+
| Name|Salary|
+-----+------+
|Sneha| 80000|
|Anita| 85000|
+-----+------+



In [7]:
spark.sql("SELECT Department, AVG(Salary) AS AvgSalary FROM employees GROUP BY Department").show()

+-----------+---------+
| Department|AvgSalary|
+-----------+---------+
|      Sales|  52500.0|
|Engineering|  82500.0|
|         HR|  45000.0|
+-----------+---------+



In [8]:
spark.sql("SELECT * FROM employees ORDER BY Salary DESC").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  104|Anita|Engineering| 85000|
|  102|Sneha|Engineering| 80000|
|  105| Amit|      Sales| 55000|
|  101| Ravi|      Sales| 50000|
|  103|Kabir|         HR| 45000|
+-----+-----+-----------+------+



# Accessing Global Temporary View Across Spark Sessions

In [13]:
# Creating a global temporary view and querying it from a new Spark session
df.createOrReplaceGlobalTempView("global_employees")

In [14]:
# Querying the global view in the current session
spark.sql("SELECT * FROM global_temp.global_employees WHERE Salary > 60000").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  102|Sneha|Engineering| 80000|
|  104|Anita|Engineering| 85000|
+-----+-----+-----------+------+



In [15]:
# Creating a new Spark session
new_spark = SparkSession.builder.appName("NewSession").getOrCreate()

# Accessing the global view from the new session
new_spark.sql("SELECT Name FROM global_temp.global_employees").show()

+-----+
| Name|
+-----+
| Ravi|
|Sneha|
|Kabir|
|Anita|
| Amit|
+-----+

