In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("cast_like_alias_sort").getOrCreate()

In [2]:
from pyspark.sql.functions import col
# Sample data
data = [("John", 30), ("Jane", 25)]
df = spark.createDataFrame(data, ["name", "age"])

# Rename the 'age' column temporarily using alias
df_with_alias = df.select(col("age").alias("years"))

df_with_alias.show()

+-----+
|years|
+-----+
|   30|
|   25|
+-----+



In [3]:
# Sort the DataFrame by 'age' in ascending order
df_sorted_asc = df.orderBy(col("age").asc())
df_sorted_asc.show()

# Sort the DataFrame by 'age' in descending order
df_sorted_desc = df.orderBy(col("age").desc())
df_sorted_desc.show()


+----+---+
|name|age|
+----+---+
|Jane| 25|
|John| 30|
+----+---+

+----+---+
|name|age|
+----+---+
|John| 30|
|Jane| 25|
+----+---+



In [4]:
# Cast the 'age' column from IntegerType to StringType
df_casted = df.withColumn("age", col("age").cast("string"))

df_casted.printSchema()
df_casted.show()


root
 |-- name: string (nullable = true)
 |-- age: string (nullable = true)

+----+---+
|name|age|
+----+---+
|John| 30|
|Jane| 25|
+----+---+



In [5]:
# Filter rows where the 'name' column starts with 'J'
df_filtered = df.filter(col("name").like("Ja%"))

df_filtered.show()


+----+---+
|name|age|
+----+---+
|Jane| 25|
+----+---+



In [7]:
df_filtered = df.filter(col("name")=="John")

df_filtered.show()

+----+---+
|name|age|
+----+---+
|John| 30|
+----+---+



In [8]:
df_where = df.where(col("name")=="John")

df_where.show()

+----+---+
|name|age|
+----+---+
|John| 30|
+----+---+



In [9]:
df_where_multiple = df.where((col("name")=="John")&(col("age")==30))

df_where_multiple.show()

+----+---+
|name|age|
+----+---+
|John| 30|
+----+---+

