### Create a sample DataFrame

In [0]:
employee_data = [
    ('10', 'John Smith', '2022-01-15', 'D01', 75000),
    ('11', 'Maria Garcia', '2021-07-10', 'D02', 82000),
    ('12', 'James Miller', '2020-03-22', 'D03', 91000),
    ('13', 'Linda Johnson', '2019-11-05', 'D01', 67000),
    ('14', 'Robert Brown', '2023-05-30', 'D02', 88000)
]
employee_schema = ['employee_id', 'name', 'doj', 'employee_dept_id', 'salary']

empDF= spark.createDataFrame(employee_data, employee_schema)
display(empDF)

employee_id,name,doj,employee_dept_id,salary
10,John Smith,2022-01-15,D01,75000
11,Maria Garcia,2021-07-10,D02,82000
12,James Miller,2020-03-22,D03,91000
13,Linda Johnson,2019-11-05,D01,67000
14,Robert Brown,2023-05-30,D02,88000


### Define UDF to rename columns

Example1

In [0]:
import pyspark.sql.functions as f

def rename_columns(df):
    for column in df.columns:
        new_column = "Col_" +column
        df = df.withColumnRenamed(column, new_column)

    return df

Example2

In [0]:
def rename_columns(df, columns_map):
    """
    Rename columns in a PySpark DataFrame.
    :param df: Input DataFrame
    :param columns_map: Dict of old_name: new_name
    :return: DataFrame with renamed columns
    """
    for old_name, new_name in columns_map.items():
        df = df.withColumnRenamed(old_name, new_name)
    return df

### Execute UDF

Example1

In [0]:
renamed_df = rename_columns(empDF)
display(renamed_df)

Col_employee_id,Col_name,Col_doj,Col_employee_dept_id,Col_salary
10,John Smith,2022-01-15,D01,75000
11,Maria Garcia,2021-07-10,D02,82000
12,James Miller,2020-03-22,D03,91000
13,Linda Johnson,2019-11-05,D01,67000
14,Robert Brown,2023-05-30,D02,88000


Example2

In [0]:
# Define the columns to rename
columns_map = {
    "employee_id": "emp_id",
    "name": "full_name",
    "doj": "date_of_joining",
    "employee_dept_id": "dept_id",
    "salary": "annual_salary"
}

empDF_renamed = rename_columns(empDF, columns_map)
display(empDF_renamed)

emp_id,full_name,date_of_joining,dept_id,annual_salary
10,John Smith,2022-01-15,D01,75000
11,Maria Garcia,2021-07-10,D02,82000
12,James Miller,2020-03-22,D03,91000
13,Linda Johnson,2019-11-05,D01,67000
14,Robert Brown,2023-05-30,D02,88000


### UDF to convert name into Upper case

In [0]:

from pyspark.sql.functions import upper, col
def upperCaseName(df):
    return df.withColumn('name_uppercase', upper('name'))


### Execute UDF

In [0]:
uppercasenameDF = upperCaseName(empDF)
display(uppercasenameDF)

employee_id,name,doj,employee_dept_id,salary,name_uppercase
10,John Smith,2022-01-15,D01,75000,JOHN SMITH
11,Maria Garcia,2021-07-10,D02,82000,MARIA GARCIA
12,James Miller,2020-03-22,D03,91000,JAMES MILLER
13,Linda Johnson,2019-11-05,D01,67000,LINDA JOHNSON
14,Robert Brown,2023-05-30,D02,88000,ROBERT BROWN
