In [0]:
import os
import sys
os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder\
        .appName("Select")\
        .getOrCreate()

In [0]:
# Create some data
data = [("James","Smith","USA","CA"),
    ("Michael","Rose","USA","NY"),
    ("Robert","Williams","USA","CA"),
    ("Maria","Jones","USA","FL")
  ]
# Create Columns names list
columns = ["firstname","lastname","country","state"]
# Create a DataFrame with the Data
df = spark.createDataFrame(data = data, schema = columns)

In [0]:
df.show(truncate=False)

+---------+--------+-------+-----+
|firstname|lastname|country|state|
+---------+--------+-------+-----+
|James    |Smith   |USA    |CA   |
|Michael  |Rose    |USA    |NY   |
|Robert   |Williams|USA    |CA   |
|Maria    |Jones   |USA    |FL   |
+---------+--------+-------+-----+



#### Selecting Single and multiple columns

In [0]:
df.select("firstname").show()

+---------+
|firstname|
+---------+
|    James|
|  Michael|
|   Robert|
|    Maria|
+---------+



In [0]:
df.select(df.firstname,df.country).show()

+---------+-------+
|firstname|country|
+---------+-------+
|    James|    USA|
|  Michael|    USA|
|   Robert|    USA|
|    Maria|    USA|
+---------+-------+



In [0]:
from pyspark.sql.functions import col
df.select(col("lastname"),col("state")).show()

+--------+-----+
|lastname|state|
+--------+-----+
|   Smith|   CA|
|    Rose|   NY|
|Williams|   CA|
|   Jones|   FL|
+--------+-----+



#### Selecting all columns

In [0]:
df.select(df.columns).show()

+---------+--------+-------+-----+
|firstname|lastname|country|state|
+---------+--------+-------+-----+
|    James|   Smith|    USA|   CA|
|  Michael|    Rose|    USA|   NY|
|   Robert|Williams|    USA|   CA|
|    Maria|   Jones|    USA|   FL|
+---------+--------+-------+-----+



In [0]:
df.select("*").show()

+---------+--------+-------+-----+
|firstname|lastname|country|state|
+---------+--------+-------+-----+
|    James|   Smith|    USA|   CA|
|  Michael|    Rose|    USA|   NY|
|   Robert|Williams|    USA|   CA|
|    Maria|   Jones|    USA|   FL|
+---------+--------+-------+-----+



In [0]:
df.select([i for i in df.columns]).show()

+---------+--------+-------+-----+
|firstname|lastname|country|state|
+---------+--------+-------+-----+
|    James|   Smith|    USA|   CA|
|  Michael|    Rose|    USA|   NY|
|   Robert|Williams|    USA|   CA|
|    Maria|   Jones|    USA|   FL|
+---------+--------+-------+-----+



In [0]:
df.select(df.columns[1:3]).show()

+--------+-------+
|lastname|country|
+--------+-------+
|   Smith|    USA|
|    Rose|    USA|
|Williams|    USA|
|   Jones|    USA|
+--------+-------+



#### Selecting columns from nested list

In [0]:
from pyspark.sql.types import StructType, StructField, StringType
df = data = [
        (("James",None,"Smith"),"OH","M"),
        (("Anna","Rose",""),"NY","F"),
        (("Julia","","Williams"),"OH","F"),
        (("Maria","Anne","Jones"),"NY","M"),
        (("Jen","Mary","Brown"),"NY","M"),
        (("Mike","Mary","Williams"),"OH","M")
        ]

schema = StructType([
    StructField('name', StructType([
         StructField('firstname', StringType(), True),
         StructField('middlename', StringType(), True),
         StructField('lastname', StringType(), True)
         ])),
     StructField('state', StringType(), True),
     StructField('gender', StringType(), True)
     ])

df2 = spark.createDataFrame(data = data, schema = schema)

df2.printSchema()

root
 |-- name: struct (nullable = true)
 |    |-- firstname: string (nullable = true)
 |    |-- middlename: string (nullable = true)
 |    |-- lastname: string (nullable = true)
 |-- state: string (nullable = true)
 |-- gender: string (nullable = true)



In [0]:
# Display the individual column from the nested list
df2.select("name.firstname").show()

+---------+
|firstname|
+---------+
|    James|
|     Anna|
|    Julia|
|    Maria|
|      Jen|
|     Mike|
+---------+



In [0]:
# Display all the nested columns
df2.select("name.*").show()

+---------+----------+--------+
|firstname|middlename|lastname|
+---------+----------+--------+
|    James|      null|   Smith|
|     Anna|      Rose|        |
|    Julia|          |Williams|
|    Maria|      Anne|   Jones|
|      Jen|      Mary|   Brown|
|     Mike|      Mary|Williams|
+---------+----------+--------+

