In [1]:
import findspark
findspark.init()

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

In [3]:
from pyspark.sql.functions import lit

col1 = lit('Test_Column')

print(type(col1))

<class 'pyspark.sql.column.Column'>


In [4]:
data = [('Srikanth','M',2000),('Manu','M',3000)]

schema = ['Name','Gender','Salary']

df = spark.createDataFrame(data=data,schema=schema)

df.printSchema()

df.show(truncate=False)

root
 |-- Name: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Salary: long (nullable = true)

+--------+------+------+
|Name    |Gender|Salary|
+--------+------+------+
|Srikanth|M     |2000  |
|Manu    |M     |3000  |
+--------+------+------+



In [5]:
df1 = df.withColumn('NewCol',lit('NewColValue'))
df1.printSchema()
df1.show(truncate=False)

root
 |-- Name: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Salary: long (nullable = true)
 |-- NewCol: string (nullable = false)

+--------+------+------+-----------+
|Name    |Gender|Salary|NewCol     |
+--------+------+------+-----------+
|Srikanth|M     |2000  |NewColValue|
|Manu    |M     |3000  |NewColValue|
+--------+------+------+-----------+



In [10]:
#you can access columns in multiple ways

from pyspark.sql.functions import col

df1.select(df1.Name).show()
df1.select(df1['Gender']).show()
df1.select(col('Salary')).show()

+--------+
|    Name|
+--------+
|Srikanth|
|    Manu|
+--------+

+------+
|Gender|
+------+
|     M|
|     M|
+------+

+------+
|Salary|
+------+
|  2000|
|  3000|
+------+



In [17]:
from pyspark.sql.types import StructType,StructField,StringType,IntegerType

data = [('Sri','M',2000,('Black','Blue')),('Akshainie','F',4000,('Black','Black'))]


properties = StructType([StructField('Hair',StringType()),\
            StructField('Eyes',StringType())])

schema = StructType([StructField('Name',StringType()), \
                    StructField('Gender',StringType()), \
                    StructField('Salary',IntegerType()), \
                    StructField('Props',properties)])
                                           
df2 = spark.createDataFrame(data,schema)
                                           
df2.show(truncate=False)
                                           
df2.printSchema()                                           
                                           

+---------+------+------+--------------+
|Name     |Gender|Salary|Props         |
+---------+------+------+--------------+
|Sri      |M     |2000  |{Black, Blue} |
|Akshainie|F     |4000  |{Black, Black}|
+---------+------+------+--------------+

root
 |-- Name: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Salary: integer (nullable = true)
 |-- Props: struct (nullable = true)
 |    |-- Hair: string (nullable = true)
 |    |-- Eyes: string (nullable = true)



In [18]:
df2.select(df2.Props.Eyes,df2.Props.Hair).show()

+----------+----------+
|Props.Eyes|Props.Hair|
+----------+----------+
|      Blue|     Black|
|     Black|     Black|
+----------+----------+



In [20]:
df2.select(df2['Props.Eyes'],df2['Props.Hair']).show()

+-----+-----+
| Eyes| Hair|
+-----+-----+
| Blue|Black|
|Black|Black|
+-----+-----+



In [23]:
from pyspark.sql.functions import col

df2.select(col('Props.Eyes'),col('Props.Hair')).show()

+-----+-----+
| Eyes| Hair|
+-----+-----+
| Blue|Black|
|Black|Black|
+-----+-----+

