In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StringType, ArrayType,StructType,StructField
spark = SparkSession.builder \
                    .appName('SparkByExamples.com') \
                    .getOrCreate()


arrayCol = ArrayType(StringType(),False)

data = [
 ("James,,Smith",["Java","Scala","C++"],["Spark","Java"],"OH","CA"),
 ("Michael,Rose,",["Spark","Java","C++"],["Spark","Java"],"NY","NJ"),
 ("Robert,,Williams",["CSharp","VB"],["Spark","Python"],"UT","NV")
]

schema = StructType([ 
    StructField("name",StringType(),True), 
    StructField("languagesAtSchool",ArrayType(StringType()),True), 
    StructField("languagesAtWork",ArrayType(StringType()),True), 
    StructField("currentState", StringType(), True), 
    StructField("previousState", StringType(), True) 
  ])

df = spark.createDataFrame(data=data,schema=schema)
df.printSchema()
df.show()

from pyspark.sql.functions import explode
df.select(df.name,explode(df.languagesAtSchool)).show()

from pyspark.sql.functions import split
df.select(split(df.name,",").alias("nameAsArray")).show()

from pyspark.sql.functions import array
df.select(df.name,array(df.currentState,df.previousState).alias("States")).show()

from pyspark.sql.functions import array_contains
df.select(df.name,array_contains(df.languagesAtSchool,"Java")
    .alias("array_contains")).show()

root
 |-- name: string (nullable = true)
 |-- languagesAtSchool: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- languagesAtWork: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- currentState: string (nullable = true)
 |-- previousState: string (nullable = true)

+----------------+------------------+---------------+------------+-------------+
|            name| languagesAtSchool|languagesAtWork|currentState|previousState|
+----------------+------------------+---------------+------------+-------------+
|    James,,Smith|[Java, Scala, C++]|  [Spark, Java]|          OH|           CA|
|   Michael,Rose,|[Spark, Java, C++]|  [Spark, Java]|          NY|           NJ|
|Robert,,Williams|      [CSharp, VB]|[Spark, Python]|          UT|           NV|
+----------------+------------------+---------------+------------+-------------+

+----------------+------+
|            name|   col|
+----------------+------+
|    James,,Smith|  Java|
|    Ja

In [0]:
#Import the necessary modules from the pyspark.sql package.
#Create a SparkSession named 'SparkByExamples.com'.
#Define the data and schema for the DataFrame. The schema defines the column names, types, and whether they can be null.
#Create the DataFrame df using the provided data and schema.
#Print the schema of the DataFrame using the printSchema() method.
#Display the contents of the DataFrame using the show() method.


In [0]:
#Import the explode function from pyspark.sql.functions.

#Use df.select() with explode(df.languagesAtSchool) to explode the "languagesAtSchool" array column into separate rows while retaining the "name" column. The result is displayed using show().

#Import the split function from pyspark.sql.functions.

#Use df.select() with split(df.name, ",") to split the "name" column by commas and alias the result as "nameAsArray". The result is displayed using show().
#Import the array function from pyspark.sql.functions.

#Use df.select() with array(df.currentState, df.previousState).alias("States") to create a new column called "States" that contains an array of "currentState" and "previousState" values. The "name" column is also retained. The result is displayed using show().

#Import the array_contains function from pyspark.sql.functions.

#Use df.select() with array_contains(df.languagesAtSchool, "Java").alias("array_contains") to check if the "languagesAtSchool" array column contains the value "Java". The "name" column is also retained. The result is displayed using show().

#These operations demonstrate how to work with arrays in a PySpark DataFrame, such as exploding arrays, splitting strings into arrays, creating new arrays, and checking array membership.