In [0]:
DataFrame.withColumn(colName, col)
Returns a new DataFrame by adding a column or replacing the existing column that has the same name.

Parameters:
colNamestr
string, name of the new column.

colColumn
a Column expression for the new column.

In [0]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StructType, StructField, StringType,IntegerType

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data = [('James','','Smith','1991-04-01','M',3000),
  ('Michael','Rose','','2000-05-19','M',4000),
  ('Robert','','Williams','1978-09-05','M',4000),
  ('Maria','Anne','Jones','1967-12-01','F',4000),
  ('Jen','Mary','Brown','1980-02-17','F',-1)
]

columns = ["firstname","middlename","lastname","dob","gender","salary"]
df = spark.createDataFrame(data=data, schema = columns)
df.printSchema()
df.show(truncate=False)


root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)

+---------+----------+--------+----------+------+------+
|firstname|middlename|lastname|dob       |gender|salary|
+---------+----------+--------+----------+------+------+
|James    |          |Smith   |1991-04-01|M     |3000  |
|Michael  |Rose      |        |2000-05-19|M     |4000  |
|Robert   |          |Williams|1978-09-05|M     |4000  |
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |
+---------+----------+--------+----------+------+------+



In [0]:
df2 = df.withColumn("salary",col("salary").cast("Integer"))
df2.printSchema()
df2.show(truncate=False)

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: integer (nullable = true)

+---------+----------+--------+----------+------+------+
|firstname|middlename|lastname|dob       |gender|salary|
+---------+----------+--------+----------+------+------+
|James    |          |Smith   |1991-04-01|M     |3000  |
|Michael  |Rose      |        |2000-05-19|M     |4000  |
|Robert   |          |Williams|1978-09-05|M     |4000  |
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |
+---------+----------+--------+----------+------+------+



In [0]:
df3 = df.withColumn("salary",col("salary")*100)
df3.printSchema()
df3.show(truncate=False) 

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)

+---------+----------+--------+----------+------+------+
|firstname|middlename|lastname|dob       |gender|salary|
+---------+----------+--------+----------+------+------+
|James    |          |Smith   |1991-04-01|M     |300000|
|Michael  |Rose      |        |2000-05-19|M     |400000|
|Robert   |          |Williams|1978-09-05|M     |400000|
|Maria    |Anne      |Jones   |1967-12-01|F     |400000|
|Jen      |Mary      |Brown   |1980-02-17|F     |-100  |
+---------+----------+--------+----------+------+------+



In [0]:

df4 = df.withColumn("CopiedColumn",col("salary")* -1)
df4.printSchema()
df4.show(truncate=False)

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- CopiedColumn: long (nullable = true)

+---------+----------+--------+----------+------+------+------------+
|firstname|middlename|lastname|dob       |gender|salary|CopiedColumn|
+---------+----------+--------+----------+------+------+------------+
|James    |          |Smith   |1991-04-01|M     |3000  |-3000       |
|Michael  |Rose      |        |2000-05-19|M     |4000  |-4000       |
|Robert   |          |Williams|1978-09-05|M     |4000  |-4000       |
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |-4000       |
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |1           |
+---------+----------+--------+----------+------+------+------------+



In [0]:
df5 = df.withColumn("Country", lit("USA"))
df5.printSchema()
df5.show(truncate=False)

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- Country: string (nullable = false)

+---------+----------+--------+----------+------+------+-------+
|firstname|middlename|lastname|dob       |gender|salary|Country|
+---------+----------+--------+----------+------+------+-------+
|James    |          |Smith   |1991-04-01|M     |3000  |USA    |
|Michael  |Rose      |        |2000-05-19|M     |4000  |USA    |
|Robert   |          |Williams|1978-09-05|M     |4000  |USA    |
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |USA    |
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |USA    |
+---------+----------+--------+----------+------+------+-------+



In [0]:
df6 = df.withColumn("Country", lit("USA")) \
   .withColumn("anotherColumn",lit("anotherValue"))
df6.printSchema()
df6.show(truncate=False)

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- Country: string (nullable = false)
 |-- anotherColumn: string (nullable = false)

+---------+----------+--------+----------+------+------+-------+-------------+
|firstname|middlename|lastname|dob       |gender|salary|Country|anotherColumn|
+---------+----------+--------+----------+------+------+-------+-------------+
|James    |          |Smith   |1991-04-01|M     |3000  |USA    |anotherValue |
|Michael  |Rose      |        |2000-05-19|M     |4000  |USA    |anotherValue |
|Robert   |          |Williams|1978-09-05|M     |4000  |USA    |anotherValue |
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |USA    |anotherValue |
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |USA    |anotherValue |
+---------+----------+--------+----------+--

In [0]:

df.withColumnRenamed("gender","sex") \
  .show(truncate=False) 

+---------+----------+--------+----------+---+------+
|firstname|middlename|lastname|dob       |sex|salary|
+---------+----------+--------+----------+---+------+
|James    |          |Smith   |1991-04-01|M  |3000  |
|Michael  |Rose      |        |2000-05-19|M  |4000  |
|Robert   |          |Williams|1978-09-05|M  |4000  |
|Maria    |Anne      |Jones   |1967-12-01|F  |4000  |
|Jen      |Mary      |Brown   |1980-02-17|F  |-1    |
+---------+----------+--------+----------+---+------+



In [0]:
df4.drop("CopiedColumn") \
.show(truncate=False) 

+---------+----------+--------+----------+------+------+
|firstname|middlename|lastname|dob       |gender|salary|
+---------+----------+--------+----------+------+------+
|James    |          |Smith   |1991-04-01|M     |3000  |
|Michael  |Rose      |        |2000-05-19|M     |4000  |
|Robert   |          |Williams|1978-09-05|M     |4000  |
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |
+---------+----------+--------+----------+------+------+



In [0]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StructType, StructField, StringType,IntegerType

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data = [('James','','Smith','1991-04-01','M',3000),
  ('Michael','Rose','','2000-05-19','M',4000),
  ('Robert','','Williams','1978-09-05','M',4000),
  ('Maria','Anne','Jones','1967-12-01','F',4000),
  ('Jen','Mary','Brown','1980-02-17','F',-1)
]

columns = ["firstname","middlename","lastname","dob","gender","salary"]
df = spark.createDataFrame(data=data, schema = columns)
df.printSchema()
df.show(truncate=False)

df2 = df.withColumn("salary",col("salary").cast("Integer"))
df2.printSchema()
df2.show(truncate=False)

df3 = df.withColumn("salary",col("salary")*100)
df3.printSchema()
df3.show(truncate=False) 

df4 = df.withColumn("CopiedColumn",col("salary")* -1)
df4.printSchema()

df5 = df.withColumn("Country", lit("USA"))
df5.printSchema()

df6 = df.withColumn("Country", lit("USA")) \
   .withColumn("anotherColumn",lit("anotherValue"))
df6.printSchema()

df.withColumnRenamed("gender","sex") \
  .show(truncate=False) 
  
df4.drop("CopiedColumn") \
.show(truncate=False) 

dataStruct = [(("James","","Smith"),"36636","M","3000"), \
      (("Michael","Rose",""),"40288","M","4000"), \
      (("Robert","","Williams"),"42114","M","4000"), \
      (("Maria","Anne","Jones"),"39192","F","4000"), \
      (("Jen","Mary","Brown"),"","F","-1") \
]

