In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import DataType
from pyspark.sql.types import StructType, StructField, StringType, ArrayType, IntegerType

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

from pyspark.sql.types import ArrayType,IntegerType
arrayType = ArrayType(IntegerType(),False)
print(arrayType.jsonValue()) 
print(arrayType.simpleString())
print(arrayType.typeName()) 


from pyspark.sql.types import MapType,StringType,IntegerType
mapType = MapType(StringType(),IntegerType())
 
print(mapType.keyType)
print(mapType.valueType)
print(mapType.valueContainsNull)

data = [("James","","Smith","36","M",3000),
    ("Michael","Rose","","40","M",4000),
    ("Robert","","Williams","42","M",4000),
    ("Maria","Anne","Jones","39","F",4000),
    ("Jen","Mary","Brown","","F",-1)
  ]

schema = StructType([ 
    StructField("firstname",StringType(),True), 
    StructField("middlename",StringType(),True), 
    StructField("lastname",StringType(),True), 
    StructField("age", StringType(), True), 
    StructField("gender", StringType(), True), 
    StructField("salary", IntegerType(), True) 
  ])


df = spark.createDataFrame(data=data,schema=schema)
df.printSchema()
df.show(truncate=False)


{'type': 'array', 'elementType': 'integer', 'containsNull': False}
array<int>
array
StringType()
IntegerType()
True
root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- age: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: integer (nullable = true)

+---------+----------+--------+---+------+------+
|firstname|middlename|lastname|age|gender|salary|
+---------+----------+--------+---+------+------+
|James    |          |Smith   |36 |M     |3000  |
|Michael  |Rose      |        |40 |M     |4000  |
|Robert   |          |Williams|42 |M     |4000  |
|Maria    |Anne      |Jones   |39 |F     |4000  |
|Jen      |Mary      |Brown   |   |F     |-1    |
+---------+----------+--------+---+------+------+



In [0]:
#The code imports necessary modules, including SparkSession from pyspark.sql and data types from pyspark.sql.types.

#A SparkSession is created with the application name set to 'SparkByExamples.com'.

#Operations on the ArrayType:

#An ArrayType of IntegerType is created using the ArrayType constructor with IntegerType() as the element type. The jsonValue(), simpleString(), and typeName() methods are used to display information about the ArrayType.
#Operations on the MapType:

#A MapType of StringType keys and IntegerType values is created using the MapType constructor with StringType() as the key type and IntegerType() as the value type. Information about the keyType, valueType, and valueContainsNull properties of the MapType are displayed.
#Data is created as a list of tuples representing individuals' information.

#A StructType schema is defined with multiple StructField elements representing the fields in the data.

#A DataFrame df is created with the given data and schema.

#The schema of the DataFrame is displayed using the printSchema() method.

#The data in the DataFrame is displayed using the show() method.

#Overall, this code demonstrates the creation and usage of different Spark data types such as ArrayType, MapType, StructType, and various field types (StringType, IntegerType). It showcases operations to retrieve information about the data types and how to define schemas for DataFrames.
