In [1]:
import findspark


In [2]:
findspark.init()

In [3]:
import pyspark


In [4]:
from pyspark.sql import SparkSession

In [5]:
 
spark = SparkSession.builder.appName('sparkdf').getOrCreate()


In [6]:

data = [["1", "sravan", "company 1"],
        ["2", "ojaswi", "company 1"],
        ["3", "rohith", "company 2"],
        ["4", "sridevi", "company 1"],
        ["5", "bobby", "company 1"]]


In [7]:
columns = ['ID', 'NAME', 'Company']

In [8]:
dataframe = spark.createDataFrame(data, columns)

In [9]:
dataframe.show()

+---+-------+---------+
| ID|   NAME|  Company|
+---+-------+---------+
|  1| sravan|company 1|
|  2| ojaswi|company 1|
|  3| rohith|company 2|
|  4|sridevi|company 1|
|  5|  bobby|company 1|
+---+-------+---------+



In [11]:
from pyspark.sql.functions import lit

In [12]:
dataframe.withColumn("salary", lit(34000)).show()

+---+-------+---------+------+
| ID|   NAME|  Company|salary|
+---+-------+---------+------+
|  1| sravan|company 1| 34000|
|  2| ojaswi|company 1| 34000|
|  3| rohith|company 2| 34000|
|  4|sridevi|company 1| 34000|
|  5|  bobby|company 1| 34000|
+---+-------+---------+------+



In [13]:
dataframe.withColumn("salary", dataframe.ID*2300).show()

+---+-------+---------+-------+
| ID|   NAME|  Company| salary|
+---+-------+---------+-------+
|  1| sravan|company 1| 2300.0|
|  2| ojaswi|company 1| 4600.0|
|  3| rohith|company 2| 6900.0|
|  4|sridevi|company 1| 9200.0|
|  5|  bobby|company 1|11500.0|
+---+-------+---------+-------+



In [14]:
from pyspark.sql.functions import concat_ws

In [15]:
dataframe.withColumn("Details", concat_ws("-", "NAME", 'Company')).show()


+---+-------+---------+-----------------+
| ID|   NAME|  Company|          Details|
+---+-------+---------+-----------------+
|  1| sravan|company 1| sravan-company 1|
|  2| ojaswi|company 1| ojaswi-company 1|
|  3| rohith|company 2| rohith-company 2|
|  4|sridevi|company 1|sridevi-company 1|
|  5|  bobby|company 1|  bobby-company 1|
+---+-------+---------+-----------------+



In [10]:
empColumns = ["emp_id","name","superior_emp_id","year_joined", "emp_dept_id","gender","salary"]

In [11]:
emp = [(1,"Smith",-1,"2018","10","M",3000),(2, "Rose",1 , "2010", "20","M", 4000),(3,"Williams",1,"2010","10","M",1000),(4, "Jones",2 ,"2005","10","F",2000),(5,"Brown",2,"2010","40","",-1),(6, "Brown", 2, "2010","50","",-1)]

In [12]:
empDF = spark.createDataFrame(data=emp, schema = empColumns)

In [13]:
empDF.show()

+------+--------+---------------+-----------+-----------+------+------+
|emp_id|    name|superior_emp_id|year_joined|emp_dept_id|gender|salary|
+------+--------+---------------+-----------+-----------+------+------+
|     1|   Smith|             -1|       2018|         10|     M|  3000|
|     2|    Rose|              1|       2010|         20|     M|  4000|
|     3|Williams|              1|       2010|         10|     M|  1000|
|     4|   Jones|              2|       2005|         10|     F|  2000|
|     5|   Brown|              2|       2010|         40|      |    -1|
|     6|   Brown|              2|       2010|         50|      |    -1|
+------+--------+---------------+-----------+-----------+------+------+



In [14]:
deptColumns = ["dept_name","dept_id"]


In [15]:
dept = [("Finance",10),("Marketing",20),("Sales",30),("IT",40)]


In [16]:
deptDF = spark.createDataFrame(data=dept, schema = deptColumns)


In [17]:
deptDF.show()

+---------+-------+
|dept_name|dept_id|
+---------+-------+
|  Finance|     10|
|Marketing|     20|
|    Sales|     30|
|       IT|     40|
+---------+-------+



In [18]:
empDF.join(deptDF,empDF.emp_dept_id ==  deptDF.dept_id,"inner") .show()


+------+--------+---------------+-----------+-----------+------+------+---------+-------+
|emp_id|    name|superior_emp_id|year_joined|emp_dept_id|gender|salary|dept_name|dept_id|
+------+--------+---------------+-----------+-----------+------+------+---------+-------+
|     1|   Smith|             -1|       2018|         10|     M|  3000|  Finance|     10|
|     3|Williams|              1|       2010|         10|     M|  1000|  Finance|     10|
|     4|   Jones|              2|       2005|         10|     F|  2000|  Finance|     10|
|     2|    Rose|              1|       2010|         20|     M|  4000|Marketing|     20|
|     5|   Brown|              2|       2010|         40|      |    -1|       IT|     40|
+------+--------+---------------+-----------+-----------+------+------+---------+-------+

