In [37]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import array, array_contains, size, expr, col,array_position,array_remove

spark = SparkSession.builder.appName("ArrayFunctionsExample").getOrCreate()

# Sample DataFrame
data = [
    (1, "Alice"),
    (2, "Bob"),
    (3, "Charlie")
]

df = spark.createDataFrame(data, ["id", "name"])
df.show()


+---+-------+
| id|   name|
+---+-------+
|  1|  Alice|
|  2|    Bob|
|  3|Charlie|
+---+-------+



## array()

In [38]:
#array()
df = df.withColumn("name_array", array(col("name"), expr('"hello"'), expr('"world"')))
df.show()

+---+-------+--------------------+
| id|   name|          name_array|
+---+-------+--------------------+
|  1|  Alice|[Alice, hello, wo...|
|  2|    Bob| [Bob, hello, world]|
|  3|Charlie|[Charlie, hello, ...|
+---+-------+--------------------+



## array_contains

In [39]:
df_contains=df.withColumn("has_hello",array_contains(col("name_array"),"hello"))

In [40]:
df_contains.show()

+---+-------+--------------------+---------+
| id|   name|          name_array|has_hello|
+---+-------+--------------------+---------+
|  1|  Alice|[Alice, hello, wo...|     true|
|  2|    Bob| [Bob, hello, world]|     true|
|  3|Charlie|[Charlie, hello, ...|     true|
+---+-------+--------------------+---------+



## array length()

In [41]:
df_len=df.withColumn("len_of_array",size(col("name_array")))

In [42]:
df_len.show()

+---+-------+--------------------+------------+
| id|   name|          name_array|len_of_array|
+---+-------+--------------------+------------+
|  1|  Alice|[Alice, hello, wo...|           3|
|  2|    Bob| [Bob, hello, world]|           3|
|  3|Charlie|[Charlie, hello, ...|           3|
+---+-------+--------------------+------------+



## array_position

In [46]:
df_position = df.withColumn("pos_hello", array_position(col("name_array"), "hello"))
df_position.show()

+---+-------+--------------------+---------+
| id|   name|          name_array|pos_hello|
+---+-------+--------------------+---------+
|  1|  Alice|[Alice, hello, wo...|        2|
|  2|    Bob| [Bob, hello, world]|        2|
|  3|Charlie|[Charlie, hello, ...|        2|
+---+-------+--------------------+---------+



## array_remove()

In [47]:
df_remove = df.withColumn("array_remove", array_remove(col("name_array"), "hello"))
df_remove.show()

+---+-------+--------------------+----------------+
| id|   name|          name_array|    array_remove|
+---+-------+--------------------+----------------+
|  1|  Alice|[Alice, hello, wo...|  [Alice, world]|
|  2|    Bob| [Bob, hello, world]|    [Bob, world]|
|  3|Charlie|[Charlie, hello, ...|[Charlie, world]|
+---+-------+--------------------+----------------+

