In [32]:
import findspark
findspark.init()

from pyspark.sql import SparkSession
from pyspark.sql.functions import col,regexp_replace,broadcast,lit

In [19]:
spark = SparkSession.builder \
.appName("withColumn") \
.master("local[4]") \
.getOrCreate()

In [20]:
df = [
    ("one","22","South","Burs@"),
    ("two","33","East","Ankara"),
    ("three","56","West","Istanbul"),
    ("four","48","North","Izmir")]

schema2 = ["Emp_no","Age","Region","City"]

df2 = spark.createDataFrame(data = df, schema = schema2) 

In [21]:
df2.show()

+------+---+------+--------+
|Emp_no|Age|Region|    City|
+------+---+------+--------+
|   one| 22| South|   Burs@|
|   two| 33|  East|  Ankara|
| three| 56|  West|Istanbul|
|  four| 48| North|   Izmir|
+------+---+------+--------+



In [22]:
 df3 = df2.withColumn("Age",df2["Age"].cast("Integer")) # Change the specific column schema

In [23]:
df3.printSchema()

root
 |-- Emp_no: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- Region: string (nullable = true)
 |-- City: string (nullable = true)



In [24]:
 df3 = df2.withColumn("Age",df2["Age"]*2) # We multiplied the ages by 2

In [25]:
df3.show()

+------+-----+------+--------+
|Emp_no|  Age|Region|    City|
+------+-----+------+--------+
|   one| 44.0| South|   Burs@|
|   two| 66.0|  East|  Ankara|
| three|112.0|  West|Istanbul|
|  four| 96.0| North|   Izmir|
+------+-----+------+--------+



In [26]:
df3 = df2.withColumn("multiplied_age",df2["Age"]*2) 
df3.show()

+------+---+------+--------+--------------+
|Emp_no|Age|Region|    City|multiplied_age|
+------+---+------+--------+--------------+
|   one| 22| South|   Burs@|          44.0|
|   two| 33|  East|  Ankara|          66.0|
| three| 56|  West|Istanbul|         112.0|
|  four| 48| North|   Izmir|          96.0|
+------+---+------+--------+--------------+



In [29]:
df4 = df3.withColumn("City",regexp_replace("City","@","a")) # Change any letters

In [30]:
df4.show()

+------+---+------+--------+--------------+
|Emp_no|Age|Region|    City|multiplied_age|
+------+---+------+--------+--------------+
|   one| 22| South|   Bursa|          44.0|
|   two| 33|  East|  Ankara|          66.0|
| three| 56|  West|Istanbul|         112.0|
|  four| 48| North|   Izmir|          96.0|
+------+---+------+--------+--------------+



In [34]:
df5 = df4.withColumn("New_Column",lit(16))

In [35]:
df5.show()

+------+---+------+--------+--------------+----------+
|Emp_no|Age|Region|    City|multiplied_age|New_Column|
+------+---+------+--------+--------------+----------+
|   one| 22| South|   Bursa|          44.0|        16|
|   two| 33|  East|  Ankara|          66.0|        16|
| three| 56|  West|Istanbul|         112.0|        16|
|  four| 48| North|   Izmir|          96.0|        16|
+------+---+------+--------+--------------+----------+



In [37]:
df6 = df5.withColumnRenamed("New_Column","Numbers")

In [38]:
df6.show()

[Stage 17:>                                                         (0 + 3) / 3]

+------+---+------+--------+--------------+-------+
|Emp_no|Age|Region|    City|multiplied_age|Numbers|
+------+---+------+--------+--------------+-------+
|   one| 22| South|   Bursa|          44.0|     16|
|   two| 33|  East|  Ankara|          66.0|     16|
| three| 56|  West|Istanbul|         112.0|     16|
|  four| 48| North|   Izmir|          96.0|     16|
+------+---+------+--------+--------------+-------+



