### Create DataFrame

In [0]:
employee_data = [(10, "Srishti","Shetty", "1998", "100", "F", 8000),
                 (20, "Aish", "Rai", "2002", "200", "M", 2000),
                 (30, "Rishabh", "Alva", "2010", "100",None, 6000),
                 (40, "Aarvi",  "Bhandhary", "1996", "400", "F", 7000),
                 (50, "Pooja",  "Hegde", "2008", "500", "F", 5000),
                 (60, "Sunil", "Shetty", "1997", "400", "M", 3000),
                 (70, "Sid", "Rai", "2010", "600", "M", 5000)
                 ]

employee_schema = ["employee_id", "first_name", "last_name", "doj", "employee_dept_id", "gender","salary"]

empDF = spark.createDataFrame(data = employee_data, schema = employee_schema)
display(empDF)

employee_id,first_name,last_name,doj,employee_dept_id,gender,salary
10,Srishti,Shetty,1998,100,F,8000
20,Aish,Rai,2002,200,M,2000
30,Rishabh,Alva,2010,100,,6000
40,Aarvi,Bhandhary,1996,400,F,7000
50,Pooja,Hegde,2008,500,F,5000
60,Sunil,Shetty,1997,400,M,3000
70,Sid,Rai,2010,600,M,5000


### Add New Column Using Constant Literal

In [0]:
from pyspark.sql.functions import lit
empDF_AddColumn = empDF.withColumn("Location", lit("Mumbai MH"))
empDF_AddColumn.show()

+-----------+----------+---------+----+----------------+------+------+---------+
|employee_id|first_name|last_name| doj|employee_dept_id|gender|salary| Location|
+-----------+----------+---------+----+----------------+------+------+---------+
|         10|   Srishti|   Shetty|1998|             100|     F|  8000|Mumbai MH|
|         20|      Aish|      Rai|2002|             200|     M|  2000|Mumbai MH|
|         30|   Rishabh|     Alva|2010|             100|  null|  6000|Mumbai MH|
|         40|     Aarvi|Bhandhary|1996|             400|     F|  7000|Mumbai MH|
|         50|     Pooja|    Hegde|2008|             500|     F|  5000|Mumbai MH|
|         60|     Sunil|   Shetty|1997|             400|     M|  3000|Mumbai MH|
|         70|       Sid|      Rai|2010|             600|     M|  5000|Mumbai MH|
+-----------+----------+---------+----+----------------+------+------+---------+



### Add a New Column By Calculation

In [0]:
from pyspark.sql.functions import concat
empDF_AddColumn = empDF.withColumn("Bonus", empDF.salary*0.1).withColumn("Name", concat("first_name",lit(" "), "last_name"))
empDF_AddColumn.show()

+-----------+----------+---------+----+----------------+------+------+-----+---------------+
|employee_id|first_name|last_name| doj|employee_dept_id|gender|salary|Bonus|           Name|
+-----------+----------+---------+----+----------------+------+------+-----+---------------+
|         10|   Srishti|   Shetty|1998|             100|     F|  8000|800.0| Srishti Shetty|
|         20|      Aish|      Rai|2002|             200|     M|  2000|200.0|       Aish Rai|
|         30|   Rishabh|     Alva|2010|             100|  null|  6000|600.0|   Rishabh Alva|
|         40|     Aarvi|Bhandhary|1996|             400|     F|  7000|700.0|Aarvi Bhandhary|
|         50|     Pooja|    Hegde|2008|             500|     F|  5000|500.0|    Pooja Hegde|
|         60|     Sunil|   Shetty|1997|             400|     M|  3000|300.0|   Sunil Shetty|
|         70|       Sid|      Rai|2010|             600|     M|  5000|500.0|        Sid Rai|
+-----------+----------+---------+----+----------------+------+------+

### Rename a Column

In [0]:
empDF_RenameColumn = empDF_AddColumn.withColumnRenamed("Name", "Full_Name").withColumnRenamed("doj", "Date_Of_Joining")
empDF_RenameColumn.show()

+-----------+----------+---------+---------------+----------------+------+------+-----+---------------+
|employee_id|first_name|last_name|Date_Of_Joining|employee_dept_id|gender|salary|Bonus|      Full_Name|
+-----------+----------+---------+---------------+----------------+------+------+-----+---------------+
|         10|   Srishti|   Shetty|           1998|             100|     F|  8000|800.0| Srishti Shetty|
|         20|      Aish|      Rai|           2002|             200|     M|  2000|200.0|       Aish Rai|
|         30|   Rishabh|     Alva|           2010|             100|  null|  6000|600.0|   Rishabh Alva|
|         40|     Aarvi|Bhandhary|           1996|             400|     F|  7000|700.0|Aarvi Bhandhary|
|         50|     Pooja|    Hegde|           2008|             500|     F|  5000|500.0|    Pooja Hegde|
|         60|     Sunil|   Shetty|           1997|             400|     M|  3000|300.0|   Sunil Shetty|
|         70|       Sid|      Rai|           2010|             6

### Drop a Column

In [0]:
empDF_DropColumn = empDF_AddColumn.drop("Name").show()
# empDF_DropColumn = empDF_AddColumn.drop("Col1", "Col2", "Col3").show() # to drop more than one column

+-----------+----------+---------+----+----------------+------+------+-----+
|employee_id|first_name|last_name| doj|employee_dept_id|gender|salary|Bonus|
+-----------+----------+---------+----+----------------+------+------+-----+
|         10|   Srishti|   Shetty|1998|             100|     F|  8000|800.0|
|         20|      Aish|      Rai|2002|             200|     M|  2000|200.0|
|         30|   Rishabh|     Alva|2010|             100|  null|  6000|600.0|
|         40|     Aarvi|Bhandhary|1996|             400|     F|  7000|700.0|
|         50|     Pooja|    Hegde|2008|             500|     F|  5000|500.0|
|         60|     Sunil|   Shetty|1997|             400|     M|  3000|300.0|
|         70|       Sid|      Rai|2010|             600|     M|  5000|500.0|
+-----------+----------+---------+----+----------------+------+------+-----+

