In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data=[("James","Bond","100",None),
      ("Ann","Varsa","200",'F'),
      ("Tom Cruise","XXX","400",''),
      ("Tom Brand",None,"400",'M')] 
columns=["fname","lname","id","gender"]
df=spark.createDataFrame(data,columns)

#alias
from pyspark.sql.functions import expr
df.select(df.fname.alias("first_name"), \
          df.lname.alias("last_name"), \
          expr(" fname ||','|| lname").alias("fullName") \
   ).show()

#asc, desc
df.sort(df.fname.asc()).show()
df.sort(df.fname.desc()).show()

#cast
df.select(df.fname,df.id.cast("int")).printSchema()

#between
df.filter(df.id.between(100,300)).show()

#contains
df.filter(df.fname.contains("Cruise")).show()

#startswith, endswith()
df.filter(df.fname.startswith("T")).show()
df.filter(df.fname.endswith("Cruise")).show()

#eqNullSafe

#isNull & isNotNull
df.filter(df.lname.isNull()).show()
df.filter(df.lname.isNotNull()).show()

#like , rlike
df.select(df.fname,df.lname,df.id) \
  .filter(df.fname.like("%om")) 

#over

#substr
df.select(df.fname.substr(1,2).alias("substr")).show()

#when & otherwise
from pyspark.sql.functions import when
df.select(df.fname,df.lname,when(df.gender=="M","Male") \
              .when(df.gender=="F","Female") \
              .when(df.gender==None ,"") \
              .otherwise(df.gender).alias("new_gender") \
    ).show()

#isin
li=["100","200"]
df.select(df.fname,df.lname,df.id) \
  .filter(df.id.isin(li)) \
  .show()

from pyspark.sql.types import StructType,StructField,StringType,ArrayType,MapType
data=[(("James","Bond"),["Java","C#"],{'hair':'black','eye':'brown'}),
      (("Ann","Varsa"),[".NET","Python"],{'hair':'brown','eye':'black'}),
      (("Tom Cruise",""),["Python","Scala"],{'hair':'red','eye':'grey'}),
      (("Tom Brand",None),["Perl","Ruby"],{'hair':'black','eye':'blue'})]

schema = StructType([
        StructField('name', StructType([
            StructField('fname', StringType(), True),
            StructField('lname', StringType(), True)])),
        StructField('languages', ArrayType(StringType()),True),
        StructField('properties', MapType(StringType(),StringType()),True)
     ])
df=spark.createDataFrame(data,schema)
df.printSchema()
#getItem()
df.select(df.languages.getItem(1)).show()

df.select(df.properties.getItem("hair")).show()

#getField from Struct or Map
df.select(df.properties.getField("hair")).show()

df.select(df.name.getField("fname")).show()

#dropFields
#from pyspark.sql.functions import col
#df.withColumn("name1",col("name").dropFields(["fname"])).show()

#withField
#from pyspark.sql.functions import lit
#df.withColumn("name",df.name.withField("fname",lit("AA"))).show()

#from pyspark.sql import Row
#from pyspark.sql.functions import lit
#df = spark.createDataFrame([Row(a=Row(b=1, c=2))])
#df.withColumn('a', df['a'].withField('b', lit(3))).select('a.b').show()
        
#from pyspark.sql import Row
#from pyspark.sql.functions import col, lit
#df = spark.createDataFrame([
#Row(a=Row(b=1, c=2, d=3, e=Row(f=4, g=5, h=6)))])
#df.withColumn('a', df['a'].dropFields('b')).show()


+----------+---------+--------------+
|first_name|last_name|      fullName|
+----------+---------+--------------+
|     James|     Bond|    James,Bond|
|       Ann|    Varsa|     Ann,Varsa|
|Tom Cruise|      XXX|Tom Cruise,XXX|
| Tom Brand|     null|          null|
+----------+---------+--------------+

+----------+-----+---+------+
|     fname|lname| id|gender|
+----------+-----+---+------+
|       Ann|Varsa|200|     F|
|     James| Bond|100|  null|
| Tom Brand| null|400|     M|
|Tom Cruise|  XXX|400|      |
+----------+-----+---+------+

+----------+-----+---+------+
|     fname|lname| id|gender|
+----------+-----+---+------+
|Tom Cruise|  XXX|400|      |
| Tom Brand| null|400|     M|
|     James| Bond|100|  null|
|       Ann|Varsa|200|     F|
+----------+-----+---+------+

root
 |-- fname: string (nullable = true)
 |-- id: integer (nullable = true)

+-----+-----+---+------+
|fname|lname| id|gender|
+-----+-----+---+------+
|James| Bond|100|  null|
|  Ann|Varsa|200|     F|
+-----+---

In [0]:
#Import the necessary modules: SparkSession from pyspark.sql.

#Create a SparkSession named 'SparkByExamples.com' using SparkSession.builder.appName('SparkByExamples.com').getOrCreate().

#Define sample data as a list of tuples.

#Define column names for the DataFrame.

#Create the DataFrame using spark.createDataFrame(data, columns).

#Perform different operations on the DataFrame:

#Alias: Use alias() or expr() to assign new column names.
#Sort: Use sort() with asc() or desc() to sort the DataFrame.
#Cast: Use cast() to change the data type of a column.
#Between: Use between() to filter rows within a range.
#Contains: Use contains() to filter rows based on substring matching.
#StartsWith and EndsWith: Use startswith() and endswith() to filter rows based on the beginning or ending of a string.
#IsNull and IsNotNull: Use isNull() and isNotNull() to filter rows based on null values.
#Like and RLike: Use like() and rlike() to filter rows based on pattern matching.
#Substr: Use substr() to extract a substring from a column.
#When and Otherwise: Use when() and otherwise() to conditionally assign values to a new column.
#IsIn: Use isin() to filter rows based on a list of values.
#GetItem: Use getItem() to access elements from an array or map column.
#GetField: Use getField() to access fields from a struct or map column.
#DropFields and WithField: Functions for manipulating struct columns (currently commented out).
#DropFields and WithField: More examples for manipulating struct columns (currently commented out).
#The code demonstrates how to perform various data manipulation operations on DataFrames in PySpark. It includes column renaming, sorting, data type casting, filtering, substring extraction, conditional column assignment, and accessing elements and fields within complex data structures.