In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("SparkSQLExample") \
    .getOrCreate()


from pyspark.sql import Row

# Sample employee data
data = [
    Row(EmpID=101, Name="Ravi", Department="Sales", Salary=50000),
    Row(EmpID=102, Name="Sneha", Department="Engineering", Salary=80000),
    Row(EmpID=103, Name="Kabir", Department="HR", Salary=45000),
    Row(EmpID=104, Name="Anita", Department="Engineering", Salary=85000),
    Row(EmpID=105, Name="Amit", Department="Sales", Salary=55000),
]

In [3]:
df=spark.createDataFrame(data)
df.show()
df.createOrReplaceTempView("employees")

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [5]:
spark.sql("select * from employees").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [6]:
spark.sql("select Name,Salary from employees where salary>60000").show()

+-----+------+
| Name|Salary|
+-----+------+
|Sneha| 80000|
|Anita| 85000|
+-----+------+



In [7]:
spark.sql("""
select department,avg(salary) as avg_salary
from employees
group by department
""").show()

+-----------+----------+
| department|avg_salary|
+-----------+----------+
|      Sales|   52500.0|
|Engineering|   82500.0|
|         HR|   45000.0|
+-----------+----------+



In [8]:
spark.sql("select * from employees order by salary desc").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  104|Anita|Engineering| 85000|
|  102|Sneha|Engineering| 80000|
|  105| Amit|      Sales| 55000|
|  101| Ravi|      Sales| 50000|
|  103|Kabir|         HR| 45000|
+-----+-----+-----------+------+



Global View which can be accessed in the other sesion also

In [9]:
df.createOrReplaceGlobalTempView("employees_global")

In [10]:
spark.sql("select * from global_temp.employees_global").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



To access the global view in th eother session ** global_temp.table name**

In [11]:
new_spark = SparkSession.builder.appName("NewSession").getOrCreate()
new_spark.sql("select * from global_temp.employees_global").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+

