In [0]:
# importing Lib
from pyspark.sql.functions import *
from pyspark.sql.types import *

from pyspark.sql.session import SparkSession

spark = SparkSession.builder.appName("testing").getOrCreate()
data = [(1, "Sagar-Prajapati"), (2, "Alex-John"), (3, "John Cena"), (4, "Kim Joe")]
schema = "ID int,Name string"
df = spark.createDataFrame(data, schema)
df.show()

+---+---------------+
| ID|           Name|
+---+---------------+
|  1|Sagar-Prajapati|
|  2|      Alex-John|
|  3|      John Cena|
|  4|        Kim Joe|
+---+---------------+



####Native Approach


In [0]:
pattern = r"( )"
replace = "-"
df_1 = df.withColumn(
    "New_Name", split(regexp_replace(col("Name"), pattern, replace), "-")
)
df_final = (
    df_1.withColumn("First_Name", col("New_Name")[0])
    .withColumn("Last_Name", col("New_Name")[1])
    .drop("New_Name", "Name")
)

+---+---------------+------------------+
| ID|           Name|          New_Name|
+---+---------------+------------------+
|  1|Sagar-Prajapati|[Sagar, Prajapati]|
|  2|      Alex-John|      [Alex, John]|
|  3|      John Cena|      [John, Cena]|
|  4|        Kim Joe|        [Kim, Joe]|
+---+---------------+------------------+



####Dynamic Approach

In [0]:
from pyspark.sql.functions import col

# Get the list of columns dynamically
new_name_columns = [
    "First_Name",
    "Last_Name",
]  # Assuming these are the desired column names

# For more understanding
df_f = df_1

# Dynamically extract elements from the New_Name list into separate columns
for i, column_name in enumerate(new_name_columns):
    df_f = df_f.withColumn(column_name, col("New_Name")[i])

# Drop the New_Name column
df_f = df_f.drop("New_Name", "Name")  # Dropping both "New_Name" and "Name" columns

# Show the resulting DataFrame
df_f.show()

+---+----------+---------+
| ID|First_Name|Last_Name|
+---+----------+---------+
|  1|     Sagar|Prajapati|
|  2|      Alex|     John|
|  3|      John|     Cena|
|  4|       Kim|      Joe|
+---+----------+---------+

