In [0]:
from pyspark.sql import Row

In [0]:
import datetime

users = [
    {
        "id": 1,
        "first_name": "Corrie",
        "last_name": "Van den Oord",
        "email": "cvandenoor@etsy.com",
        "phone_numbers": Row(mobile = "+91 8645879087",home = "+91 9878673289"),
        "is_customer": True,
        "amount_paid": 1000.55,
        "customer_from": datetime.date(2021,1,15),
        "last_updated_ts": datetime.datetime(2021,2,10,1,15,0)
    },
    {
        "id": 2,
        "first_name": "John",
        "last_name": "Cena",
        "email": "john@cena.com",
        "phone_numbers": Row(mobile = "+91 9886879087",home = "+91 9134673289"),
        "is_customer": True,
        "amount_paid": 900.0,
        "customer_from": datetime.date(2022,5,15),
        "last_updated_ts": datetime.datetime(2024,3,15,1,16,0)
    },
    {
        "id": 3,
        "first_name": "James",
        "last_name": "Bond",
        "email": "james@bond.com",
        "phone_numbers": Row(mobile = "+91 3245879087",home = "+91 9854673289"),
        "is_customer": False,
        "amount_paid": 750.60,
        "customer_from": datetime.date(2023,1,12),
        "last_updated_ts": datetime.datetime(2018,5,5,5,17,2)
    },
    {
        "id": 4,
        "first_name": "Robert",
        "last_name": "Dowrey",
        "email": "robert@dowrey.com",
        "phone_numbers": None,
        "is_customer": True,
        "amount_paid": None,
        "customer_from": None,
        "last_updated_ts": datetime.datetime(2019,4,3,8,14,8)
    },
    {
        "id": 5,
        "first_name": "Chris",
        "last_name": "Hemmsworth",
        "email": "chris@hemmsworth.com",
        "phone_numbers": Row(mobile = "+91 9085879087", home = None),
        "is_customer": False,
        "amount_paid": None,
        "customer_from": None,
        "last_updated_ts": datetime.datetime(2019,4,3,8,14,8)
    },
]

In [0]:
users_df = spark.createDataFrame([Row(**user) for user in users])

In [0]:
users_df.show()

+---+----------+------------+--------------------+--------------------+-----------+-----------+-------------+-------------------+
| id|first_name|   last_name|               email|       phone_numbers|is_customer|amount_paid|customer_from|    last_updated_ts|
+---+----------+------------+--------------------+--------------------+-----------+-----------+-------------+-------------------+
|  1|    Corrie|Van den Oord| cvandenoor@etsy.com|{+91 8645879087, ...|       true|    1000.55|   2021-01-15|2021-02-10 01:15:00|
|  2|      John|        Cena|       john@cena.com|{+91 9886879087, ...|       true|      900.0|   2022-05-15|2024-03-15 01:16:00|
|  3|     James|        Bond|      james@bond.com|{+91 3245879087, ...|      false|      750.6|   2023-01-12|2018-05-05 05:17:02|
|  4|    Robert|      Dowrey|   robert@dowrey.com|                null|       true|       null|         null|2019-04-03 08:14:08|
|  5|     Chris|  Hemmsworth|chris@hemmsworth.com|{+91 9085879087, ...|      false|       

In [0]:
users_df.select('id','phone_numbers').show(truncate=False)

+---+--------------------------------+
|id |phone_numbers                   |
+---+--------------------------------+
|1  |{+91 8645879087, +91 9878673289}|
|2  |{+91 9886879087, +91 9134673289}|
|3  |{+91 3245879087, +91 9854673289}|
|4  |null                            |
|5  |{+91 9085879087, null}          |
+---+--------------------------------+



In [0]:
users_df.dtypes

Out[9]: [('id', 'bigint'),
 ('first_name', 'string'),
 ('last_name', 'string'),
 ('email', 'string'),
 ('phone_numbers', 'struct<mobile:string,home:string>'),
 ('is_customer', 'boolean'),
 ('amount_paid', 'double'),
 ('customer_from', 'date'),
 ('last_updated_ts', 'timestamp')]

In [0]:
users_df.select('id','phone_numbers.mobile','phone_numbers.home').show()

+---+--------------+--------------+
| id|        mobile|          home|
+---+--------------+--------------+
|  1|+91 8645879087|+91 9878673289|
|  2|+91 9886879087|+91 9134673289|
|  3|+91 3245879087|+91 9854673289|
|  4|          null|          null|
|  5|+91 9085879087|          null|
+---+--------------+--------------+



In [0]:
from pyspark.sql.functions import col

In [0]:
users_df.select('id',col('phone_numbers')['mobile'], col('phone_numbers')['home']).show()

+---+--------------------+------------------+
| id|phone_numbers.mobile|phone_numbers.home|
+---+--------------------+------------------+
|  1|      +91 8645879087|    +91 9878673289|
|  2|      +91 9886879087|    +91 9134673289|
|  3|      +91 3245879087|    +91 9854673289|
|  4|                null|              null|
|  5|      +91 9085879087|              null|
+---+--------------------+------------------+



In [0]:
users_df.select('id','phone_numbers.*').show()

+---+--------------+--------------+
| id|        mobile|          home|
+---+--------------+--------------+
|  1|+91 8645879087|+91 9878673289|
|  2|+91 9886879087|+91 9134673289|
|  3|+91 3245879087|+91 9854673289|
|  4|          null|          null|
|  5|+91 9085879087|          null|
+---+--------------+--------------+

