In [4]:
import findspark
findspark.init()

<b>Creating a SparkSession</b>
<ul>
<li>SparkSession.builder: This is the starting point for creating a SparkSession. It allows you to configure various options.</li>
<li>.appName('SpSQL'): Sets the name of the Spark application (which appears in the Spark UI). In this case, the name is set to 'SpSQL'.</li>
<li>.getOrCreate(): Retrieves an existing SparkSession or creates a new one if none exists. If a SparkSession with the same name already exists, it returns that; otherwise, it creates a new session.</li>
The resulting sc object represents the Spark session, and you can use it to perform various Spark operations.
</ul>

In [5]:
from pyspark.sql import SparkSession

In [11]:
sc = SparkSession.builder.appName('SpSQL').getOrCreate()

In [7]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import IntegerType, StringType, ArrayType

In [8]:
data = [
    (("James","","Smith"),["Java","Scala","C++"],"OH","M"),
    (("Anna","Rose",""),["Spark","Java","C++"],"NY","F"),
    (("Julia","","Williams"),["CSharp","VB"],"OH","F"),
    (("Maria","Anne","Jones"),["CSharp","VB"],"NY","M"),
    (("Jen","Mary","Brown"),["CSharp","VB"],"NY","M"),
    (("Mike","Mary","Williams"),["Python","VB"],"OH","M")
 ]


In [10]:
schema = StructType([StructField('name', StructType([StructField('firstname', StringType(), True), 
                                            StructField('middlename', StringType(), True),
                                            StructField('lastname', StringType(), True)
                                            ])),
                                            StructField('languages',ArrayType(StringType()), True), 
                                            StructField('state', StringType(), True),
                                            StructField('gender', StringType(), True)
                                            ])

In [15]:
df = sc.createDataFrame(data= data, schema=schema)
df.printSchema()
df.show(truncate=False)

root
 |-- name: struct (nullable = true)
 |    |-- firstname: string (nullable = true)
 |    |-- middlename: string (nullable = true)
 |    |-- lastname: string (nullable = true)
 |-- languages: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- state: string (nullable = true)
 |-- gender: string (nullable = true)

+----------------------+------------------+-----+------+
|name                  |languages         |state|gender|
+----------------------+------------------+-----+------+
|{James, , Smith}      |[Java, Scala, C++]|OH   |M     |
|{Anna, Rose, }        |[Spark, Java, C++]|NY   |F     |
|{Julia, , Williams}   |[CSharp, VB]      |OH   |F     |
|{Maria, Anne, Jones}  |[CSharp, VB]      |NY   |M     |
|{Jen, Mary, Brown}    |[CSharp, VB]      |NY   |M     |
|{Mike, Mary, Williams}|[Python, VB]      |OH   |M     |
+----------------------+------------------+-----+------+

