In [1]:
import pyspark
import findspark
findspark.init()

In [2]:

from pyspark.sql import SparkSession
spark=SparkSession.builder.appName("Renaming Columns in data frame").getOrCreate()
spark

In [3]:
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName("Renaming Columns in data frame").getOrCreate()
# creating data
data=[('Ram','2002-09-03','M',65000),
      ('Raju','1980-08-12','M',78000),
      ('Dhana Laxmi','1992-12-12','F',67000),
      ('Shivathmika','2005-03-05','F',80000),
      ('Mona','2002-08-01','F',90000)]
# columns
columns=["Name","DOB","Gender","Salary"]
# creating spark dataframe
df=spark.createDataFrame(data=data,schema=columns)
df.show()

+-----------+----------+------+------+
|       Name|       DOB|Gender|Salary|
+-----------+----------+------+------+
|        Ram|2002-09-03|     M| 65000|
|       Raju|1980-08-12|     M| 78000|
|Dhana Laxmi|1992-12-12|     F| 67000|
|Shivathmika|2005-03-05|     F| 80000|
|       Mona|2002-08-01|     F| 90000|
+-----------+----------+------+------+



In [4]:
#Method 1: Using withColumnRenamed()
# Renaming from DOB to DateOfBirth
df.withColumnRenamed("DOB","DateOfBirth").show()


+-----------+-----------+------+------+
|       Name|DateOfBirth|Gender|Salary|
+-----------+-----------+------+------+
|        Ram| 2002-09-03|     M| 65000|
|       Raju| 1980-08-12|     M| 78000|
|Dhana Laxmi| 1992-12-12|     F| 67000|
|Shivathmika| 2005-03-05|     F| 80000|
|       Mona| 2002-08-01|     F| 90000|
+-----------+-----------+------+------+



In [5]:
# Renaming multiple columns
# Gender to Sex and Salary to Amount
df.withColumnRenamed("Gender","Sex").withColumnRenamed("Salary","Amount").show()

+-----------+----------+---+------+
|       Name|       DOB|Sex|Amount|
+-----------+----------+---+------+
|        Ram|2002-09-03|  M| 65000|
|       Raju|1980-08-12|  M| 78000|
|Dhana Laxmi|1992-12-12|  F| 67000|
|Shivathmika|2005-03-05|  F| 80000|
|       Mona|2002-08-01|  F| 90000|
+-----------+----------+---+------+



In [6]:
# Method 2: Using selectExpr()
# change Name to name and remaining columns as it is
df.selectExpr("Name as name","DOB","Gender","Salary").show()

+-----------+----------+------+------+
|       name|       DOB|Gender|Salary|
+-----------+----------+------+------+
|        Ram|2002-09-03|     M| 65000|
|       Raju|1980-08-12|     M| 78000|
|Dhana Laxmi|1992-12-12|     F| 67000|
|Shivathmika|2005-03-05|     F| 80000|
|       Mona|2002-08-01|     F| 90000|
+-----------+----------+------+------+



In [7]:
#Method 3: Using select() method
# select(col().alias())
# Rename Salary to Amount and remaining columns as it as
from pyspark.sql.functions import col
data=df.select(col("Name"),col("DOB"),col("Gender"),col("Salary").alias("Amount"))
data.show()


+-----------+----------+------+------+
|       Name|       DOB|Gender|Amount|
+-----------+----------+------+------+
|        Ram|2002-09-03|     M| 65000|
|       Raju|1980-08-12|     M| 78000|
|Dhana Laxmi|1992-12-12|     F| 67000|
|Shivathmika|2005-03-05|     F| 80000|
|       Mona|2002-08-01|     F| 90000|
+-----------+----------+------+------+



In [8]:
#Method 4: Using toDF()
#toDF(*col)
#create an order list of new column names and pass it into toDF function
data_list=["Emp Name","Birth date","M/F","Paid salary"]
new_df=df.toDF(*data_list)
new_df.show()

+-----------+----------+---+-----------+
|   Emp Name|Birth date|M/F|Paid salary|
+-----------+----------+---+-----------+
|        Ram|2002-09-03|  M|      65000|
|       Raju|1980-08-12|  M|      78000|
|Dhana Laxmi|1992-12-12|  F|      67000|
|Shivathmika|2005-03-05|  F|      80000|
|       Mona|2002-08-01|  F|      90000|
+-----------+----------+---+-----------+



In [9]:
# To change only some columns
data_list=["Name","Birth Date","Sex","Salary"]
new_df=df.toDF(*data_list)
new_df.show()

+-----------+----------+---+------+
|       Name|Birth Date|Sex|Salary|
+-----------+----------+---+------+
|        Ram|2002-09-03|  M| 65000|
|       Raju|1980-08-12|  M| 78000|
|Dhana Laxmi|1992-12-12|  F| 67000|
|Shivathmika|2005-03-05|  F| 80000|
|       Mona|2002-08-01|  F| 90000|
+-----------+----------+---+------+

