In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("SparkSQLExample").getOrCreate()

In [2]:
from pyspark.sql import Row

# Sample employee data
data = [
    Row(EmpID=101, Name="Ravi", Department="Sales", Salary=50000),
    Row(EmpID=102, Name="Sneha", Department="Engineering", Salary=80000),
    Row(EmpID=103, Name="Kabir", Department="HR", Salary=45000),
    Row(EmpID=104, Name="Anita", Department="Engineering", Salary=85000),
    Row(EmpID=105, Name="Amit", Department="Sales", Salary=55000),
]
df = spark.createDataFrame(data)
df.show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [3]:
df.createOrReplaceTempView('employees')

In [4]:
spark.sql("select * from employees").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [5]:
spark.sql("select Name, Salary from employees where Salary > 60000").show()

+-----+------+
| Name|Salary|
+-----+------+
|Sneha| 80000|
|Anita| 85000|
+-----+------+



In [7]:
spark.sql("select Department,avg(Salary) as Avg_salary from employees group by Department").show()

+-----------+----------+
| Department|Avg_salary|
+-----------+----------+
|      Sales|   52500.0|
|Engineering|   82500.0|
|         HR|   45000.0|
+-----------+----------+



In [8]:
spark.sql("select * from employees order by Salary Desc").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  104|Anita|Engineering| 85000|
|  102|Sneha|Engineering| 80000|
|  105| Amit|      Sales| 55000|
|  101| Ravi|      Sales| 50000|
|  103|Kabir|         HR| 45000|
+-----+-----+-----------+------+



In [9]:
# global temp
df.createOrReplaceGlobalTempView('global_employees')

In [11]:
spark.sql("select * from global_temp.global_employees where salary > 60000").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  102|Sneha|Engineering| 80000|
|  104|Anita|Engineering| 85000|
+-----+-----+-----------+------+



In [12]:
new_spark = SparkSession.builder.appName("globaltempExample").getOrCreate()
spark.sql("select Name, Salary from global_temp.global_employees ").show()

+-----+------+
| Name|Salary|
+-----+------+
| Ravi| 50000|
|Sneha| 80000|
|Kabir| 45000|
|Anita| 85000|
| Amit| 55000|
+-----+------+

