In [0]:
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

simpleData = [("James",34,"2006-01-01","true","M",3000.60),
    ("Michael",33,"1980-01-10","true","F",3300.80),
    ("Robert",37,"06-01-1992","false","M",5000.50)
  ]

columns = ["firstname","age","jobStartDate","isGraduated","gender","salary"]
df = spark.createDataFrame(data = simpleData, schema = columns)
df.printSchema()
df.show(truncate=False)

from pyspark.sql.functions import col
from pyspark.sql.types import StringType,BooleanType,DateType
df2 = df.withColumn("age",col("age").cast(StringType())) \
    .withColumn("isGraduated",col("isGraduated").cast(BooleanType())) \
    .withColumn("jobStartDate",col("jobStartDate").cast(DateType()))
df2.printSchema()

df3 = df2.selectExpr("cast(age as int) age",
    "cast(isGraduated as string) isGraduated",
    "cast(jobStartDate as string) jobStartDate")
df3.printSchema()
df3.show(truncate=False)

df3.createOrReplaceTempView("CastExample")
df4 = spark.sql("SELECT STRING(age),BOOLEAN(isGraduated),DATE(jobStartDate) from CastExample")
df4.printSchema()
df4.show(truncate=False)

root
 |-- firstname: string (nullable = true)
 |-- age: long (nullable = true)
 |-- jobStartDate: string (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: double (nullable = true)

+---------+---+------------+-----------+------+------+
|firstname|age|jobStartDate|isGraduated|gender|salary|
+---------+---+------------+-----------+------+------+
|James    |34 |2006-01-01  |true       |M     |3000.6|
|Michael  |33 |1980-01-10  |true       |F     |3300.8|
|Robert   |37 |06-01-1992  |false      |M     |5000.5|
+---------+---+------------+-----------+------+------+

root
 |-- firstname: string (nullable = true)
 |-- age: string (nullable = true)
 |-- jobStartDate: date (nullable = true)
 |-- isGraduated: boolean (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: double (nullable = true)

root
 |-- age: integer (nullable = true)
 |-- isGraduated: string (nullable = true)
 |-- jobStartDate: string (nullable = true)

In [0]:
#Import the pyspark module and the SparkSession class from pyspark.sql.
#Create a SparkSession named 'SparkByExamples.com' using SparkSession.builder.appName('SparkByExamples.com').getOrCreate().
#Define the simple data as a list of tuples, where each tuple represents a row in the DataFrame.
#Define the column names for the DataFrame.
#Create the DataFrame df using the provided data and schema using spark.createDataFrame(data=simpleData, schema=columns).
#Print the schema of the DataFrame using the printSchema() method.
#Display the contents of the DataFrame using the show() method.
#Import the necessary functions and data types from pyspark.sql.functions and pyspark.sql.types.
#Use the withColumn() method to apply casting operations on the columns of the DataFrame. This converts the data types of the columns to the specified types.
#Print the schema of the updated DataFrame df2 using the printSchema() method.
#Use selectExpr() to cast the columns to different types using SQL-like expressions and create the DataFrame df3.
#Print the schema of the DataFrame df3 using the printSchema() method.
#Display the contents of the DataFrame df3 using the show() method.
#Create a temporary view named "CastExample" for the DataFrame df3 using createOrReplaceTempView().
#Use spark.sql() to run a SQL query that casts the columns to different types, and assign the result to df4.
#Print the schema of the DataFrame df4 using the printSchema() method.
#Display the contents of the DataFrame df4 using the show() method.
#This code showcases how to use casting operations in PySpark to convert data types of columns in a DataFrame. It demonstrates various casting techniques such as using the withColumn() method, selectExpr(), and running SQL queries.