<a href="https://colab.research.google.com/github/ArulrajGopal/Spark-Guide/blob/main/13_Structs_withCols.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install pyspark

from pyspark.sql import SparkSession

#create spark session
spark= SparkSession.builder.appName('mysparksession').getOrCreate()

#create spark context
sc = spark.sparkContext



In [5]:
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
from pyspark.sql.functions import *

In [6]:
dataDF = [(('James','','Smith'),'1991-04-01','M',3000),
  (('Michael','Rose',''),'2000-05-19','M',4000),
  (('Robert','','Williams'),'1978-09-05','M',4000),
  (('Maria','Anne','Jones'),'1967-12-01','F',4000),
  (('Jen','Mary','Brown'),'1980-02-17','F',-1)
]

schema = StructType([
        StructField('name', StructType([
             StructField('firstname', StringType(), True),
             StructField('middlename', StringType(), True),
             StructField('lastname', StringType(), True)
             ])),
         StructField('dob', StringType(), True),
         StructField('gender', StringType(), True),
         StructField('salary', IntegerType(), True)
         ])

df = spark.createDataFrame(data = dataDF, schema = schema)
df.printSchema()


root
 |-- name: struct (nullable = true)
 |    |-- firstname: string (nullable = true)
 |    |-- middlename: string (nullable = true)
 |    |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: integer (nullable = true)



In [13]:
# Example 1
df.withColumnRenamed("dob","DateOfBirth").show()

+--------------------+-----------+------+------+
|                name|DateOfBirth|gender|salary|
+--------------------+-----------+------+------+
|    {James, , Smith}| 1991-04-01|     M|  3000|
|   {Michael, Rose, }| 2000-05-19|     M|  4000|
|{Robert, , Williams}| 1978-09-05|     M|  4000|
|{Maria, Anne, Jones}| 1967-12-01|     F|  4000|
|  {Jen, Mary, Brown}| 1980-02-17|     F|    -1|
+--------------------+-----------+------+------+



In [14]:
df.withColumn("fname",col("name.firstname")) \
      .withColumn("mname",col("name.middlename")) \
      .withColumn("lname",col("name.lastname")) \
      .drop("name")\
      .show()

+----------+------+------+-------+-----+--------+
|       dob|gender|salary|  fname|mname|   lname|
+----------+------+------+-------+-----+--------+
|1991-04-01|     M|  3000|  James|     |   Smith|
|2000-05-19|     M|  4000|Michael| Rose|        |
|1978-09-05|     M|  4000| Robert|     |Williams|
|1967-12-01|     F|  4000|  Maria| Anne|   Jones|
|1980-02-17|     F|    -1|    Jen| Mary|   Brown|
+----------+------+------+-------+-----+--------+



In [16]:
df.withColumn("timestamp_column", current_timestamp())\
 .withColumn("salary",col("Salary")/10)\
 .show(truncate=False)

+--------------------+----------+------+------+--------------------------+
|name                |dob       |gender|salary|timestamp_column          |
+--------------------+----------+------+------+--------------------------+
|{James, , Smith}    |1991-04-01|M     |300.0 |2024-01-31 18:57:52.497996|
|{Michael, Rose, }   |2000-05-19|M     |400.0 |2024-01-31 18:57:52.497996|
|{Robert, , Williams}|1978-09-05|M     |400.0 |2024-01-31 18:57:52.497996|
|{Maria, Anne, Jones}|1967-12-01|F     |400.0 |2024-01-31 18:57:52.497996|
|{Jen, Mary, Brown}  |1980-02-17|F     |-0.1  |2024-01-31 18:57:52.497996|
+--------------------+----------+------+------+--------------------------+

