In [0]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[1]") \
                    .appName('SparkByExamples.com') \
                    .getOrCreate()

columns = ["name","languagesAtSchool","currentState"]
data = [("James,,Smith",["Java","Scala","C++"],"CA"), \
    ("Michael,Rose,",["Spark","Java","C++"],"NJ"), \
    ("Robert,,Williams",["CSharp","VB"],"NV")]

df = spark.createDataFrame(data=data,schema=columns)
df.printSchema()
df.show(truncate=False)

from pyspark.sql.functions import col, concat_ws
df2 = df.withColumn("languagesAtSchool",
   concat_ws(",",col("languagesAtSchool")))
df2.printSchema()
df2.show(truncate=False)


df.createOrReplaceTempView("ARRAY_STRING")
spark.sql("select name, concat_ws(',',languagesAtSchool) as languagesAtSchool,currentState from ARRAY_STRING").show(truncate=False)

root
 |-- name: string (nullable = true)
 |-- languagesAtSchool: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- currentState: string (nullable = true)

+----------------+------------------+------------+
|name            |languagesAtSchool |currentState|
+----------------+------------------+------------+
|James,,Smith    |[Java, Scala, C++]|CA          |
|Michael,Rose,   |[Spark, Java, C++]|NJ          |
|Robert,,Williams|[CSharp, VB]      |NV          |
+----------------+------------------+------------+

root
 |-- name: string (nullable = true)
 |-- languagesAtSchool: string (nullable = false)
 |-- currentState: string (nullable = true)

+----------------+-----------------+------------+
|name            |languagesAtSchool|currentState|
+----------------+-----------------+------------+
|James,,Smith    |Java,Scala,C++   |CA          |
|Michael,Rose,   |Spark,Java,C++   |NJ          |
|Robert,,Williams|CSharp,VB        |NV          |
+----------------+------

In [0]:
#Import the necessary modules from the pyspark.sql package.
#Create a SparkSession named 'SparkByExamples.com'.
#Define the column names and sample data for the DataFrame.
#Create the DataFrame df using the provided data and schema.
#Print the schema of the DataFrame using the printSchema() method.
#Display the contents of the DataFrame using the show() method.
#Import the necessary functions (col, concat_ws) from pyspark.sql.functions.
#Use withColumn() to transform the "languagesAtSchool" column by concatenating the array elements into a single string separated by commas. Assign the transformed DataFrame to df2.
#Print the schema of df2 using the printSchema() method.
#Display the contents of df2 using the show() method.
#Register df as a temporary view named "ARRAY_STRING" using createOrReplaceTempView().
#Execute a SQL query using Spark SQL to select the "name", transformed "languagesAtSchool", and "currentState" columns from the "ARRAY_STRING" view. The concat_ws() function is used in the SQL query to perform the same transformation as in step 8. The result is displayed using the show() method.
#The main focus of this code is to showcase two different ways to concatenate array elements into a single string using concat_ws(): one using DataFrame functions and another using Spark SQL.
