In [0]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
from pyspark.sql.functions import when, lit, col

In [0]:
Schema= StructType([\
                    StructField("Firstname",StringType(),True), \
                    StructField("Middlename",StringType(),True), \
                    StructField("Lastname",StringType(),True), \
                    StructField("ID",StringType(),True), \
                    StructField("Salary",StringType(),True), \
                    StructField("Country",StringType(),True), \
                    StructField("Gender",StringType(),True), \
                   ])

In [0]:
data = [("Saleem","Khan","R","001",2000,"INDIA","Male"),
("Salman","Khan","JW","002",3000,"Afghanistan","Male"),
("Sharuk","Khan","tw","003",5000,"INDIA","Male"),
("King","Quen","O","004",7000,"Namibia","Male"),
("Vintu","Sharma","K","005",8000,"INDIA","Male"),
("Virat","Kholi","M","006",9000,"Algeria","Female"),
("Waseem","Jaffer","K","007",12000,"INDIA","Female"),
("Jasprith","Khan","R","008",13000,"South Africa","Male"),
("Surya","Kumar","Yadaw","009",14000,"INDIA","Female"),
("Rahul","KL","R","010",15000,"Namibia","Male"),
("Mohammed","Shami","R","011",16000,"INDIA","Female"),
("Kulip","Yadav","Kumar","012",17000,"Pakistan","Male"),
("Praveen","Khana","Kumar","013",18000,"INDIA","Male"),
("Hashim","Amla","Mohammed","014",19000,"Canada","Female"),
("AB","King","Divilers","015",20000,"INDIA","Male"),
("Axar","Patil","M","016",21000,"Dominica","Female"),
("Rohit","Sharma","R","016",22000,"Canada","Male"),
("Hardik","Panda","kurnal","017",23000,"Egypt","Female"),
("Wasim","Akram","Pak","018",24000,"INDIA","Male"),
("Shoib","Aktar","Pak","019",2000,"South Africa","Male")]
    


In [0]:
df = spark.createDataFrame(data=data,schema=Schema)
df.printSchema()
display(df)

    


root
 |-- Firstname: string (nullable = true)
 |-- Middlename: string (nullable = true)
 |-- Lastname: string (nullable = true)
 |-- ID: string (nullable = true)
 |-- Salary: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Gender: string (nullable = true)



Firstname,Middlename,Lastname,ID,Salary,Country,Gender
Saleem,Khan,R,1,2000,INDIA,Male
Salman,Khan,JW,2,3000,Afghanistan,Male
Sharuk,Khan,tw,3,5000,INDIA,Male
King,Quen,O,4,7000,Namibia,Male
Vintu,Sharma,K,5,8000,INDIA,Male
Virat,Kholi,M,6,9000,Algeria,Female
Waseem,Jaffer,K,7,12000,INDIA,Female
Jasprith,Khan,R,8,13000,South Africa,Male
Surya,Kumar,Yadaw,9,14000,INDIA,Female
Rahul,KL,R,10,15000,Namibia,Male


In [0]:
df.select("Firstname","Middlename").display(df)

Firstname,Middlename
Saleem,Khan
Salman,Khan
Sharuk,Khan
King,Quen
Vintu,Sharma
Virat,Kholi
Waseem,Jaffer
Jasprith,Khan
Surya,Kumar
Rahul,KL


In [0]:
df.withColumnRenamed("gender","sex").display(df)

Firstname,Middlename,Lastname,ID,Salary,Country,sex
Saleem,Khan,R,1,2000,INDIA,Male
Salman,Khan,JW,2,3000,Afghanistan,Male
Sharuk,Khan,tw,3,5000,INDIA,Male
King,Quen,O,4,7000,Namibia,Male
Vintu,Sharma,K,5,8000,INDIA,Male
Virat,Kholi,M,6,9000,Algeria,Female
Waseem,Jaffer,K,7,12000,INDIA,Female
Jasprith,Khan,R,8,13000,South Africa,Male
Surya,Kumar,Yadaw,9,14000,INDIA,Female
Rahul,KL,R,10,15000,Namibia,Male


In [0]:
df2 = df.select(col("ID"),col("Salary"),lit("Sportman").alias("Role"))
display(df2)

ID,Salary,Role
1,2000,Sportman
2,3000,Sportman
3,5000,Sportman
4,7000,Sportman
5,8000,Sportman
6,9000,Sportman
7,12000,Sportman
8,13000,Sportman
9,14000,Sportman
10,15000,Sportman


In [0]:
df.filter("Gender == 'Male'").display(df)

Firstname,Middlename,Lastname,ID,Salary,Country,Gender
Saleem,Khan,R,1,2000,INDIA,Male
Salman,Khan,JW,2,3000,Afghanistan,Male
Sharuk,Khan,tw,3,5000,INDIA,Male
King,Quen,O,4,7000,Namibia,Male
Vintu,Sharma,K,5,8000,INDIA,Male
Jasprith,Khan,R,8,13000,South Africa,Male
Rahul,KL,R,10,15000,Namibia,Male
Kulip,Yadav,Kumar,12,17000,Pakistan,Male
Praveen,Khana,Kumar,13,18000,INDIA,Male
AB,King,Divilers,15,20000,INDIA,Male


In [0]:
df.sort("Firstname","Country").display(df)

Firstname,Middlename,Lastname,ID,Salary,Country,Gender
AB,King,Divilers,15,20000,INDIA,Male
Axar,Patil,M,16,21000,Dominica,Female
Hardik,Panda,kurnal,17,23000,Egypt,Female
Hashim,Amla,Mohammed,14,19000,Canada,Female
Jasprith,Khan,R,8,13000,South Africa,Male
King,Quen,O,4,7000,Namibia,Male
Kulip,Yadav,Kumar,12,17000,Pakistan,Male
Mohammed,Shami,R,11,16000,INDIA,Female
Praveen,Khana,Kumar,13,18000,INDIA,Male
Rahul,KL,R,10,15000,Namibia,Male


In [0]:
df.printSchema()

root
 |-- Firstname: string (nullable = true)
 |-- Middlename: string (nullable = true)
 |-- Lastname: string (nullable = true)
 |-- ID: string (nullable = true)
 |-- Salary: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Gender: string (nullable = true)



In [0]:
df2 = df.withColumn("Salary",df["Salary"].cast('Integer'))
df2.printSchema()

root
 |-- Firstname: string (nullable = true)
 |-- Middlename: string (nullable = true)
 |-- Lastname: string (nullable = true)
 |-- ID: string (nullable = true)
 |-- Salary: integer (nullable = true)
 |-- Country: string (nullable = true)
 |-- Gender: string (nullable = true)

