In [0]:
%run "./01 - Preparing dataFrame"

In [0]:
from pyspark.sql.functions import *

+---+----------+------------+--------------------+--------------------+-------+-----------+-----------+-------------+-------------------+
| id|first_name|   last_name|               email|       phone_numbers|courses|is_customer|amount_paid|customer_from|    last_updated_ts|
+---+----------+------------+--------------------+--------------------+-------+-----------+-----------+-------------+-------------------+
|  1|    Corrie|Van den Oord|cvandenoord0@etsy...|{+1 234 567 8901,...| [1, 2]|       true|    1000.55|   2021-01-15|2021-02-10 01:15:00|
|  2|  Nikolaus|     Brewitt|nbrewitt1@dailyma...|{+1 234 567 8923,...|    [3]|       true|      900.0|   2021-02-14|2021-02-18 03:33:00|
|  3|    Orelie|      Penney|openney2@vistapri...|{+1 714 512 9752,...| [2, 4]|       true|     850.55|   2021-01-21|2021-03-15 15:16:55|
|  4|     Ashby|    Maddocks|  amaddocks3@home.pl|        {NULL, NULL}|     []|      false|        NaN|         NULL|2021-04-10 17:45:30|
|  5|      Kurt|        Rome|krome

In [0]:
# RENAMING COLUNNS

# On top of a data frame:
#   .withColumn(col_name, expression)
#   .withColumnRenamed(old_col_name, new_col_name)
#   .toDF(*new_col_names)

# Inside a data frame:
#   col().alias(new_col_name)

In [0]:
# .withColumn(col_name, expression)

users_df \
    .select('id', 'first_name', 'last_name') \
    .withColumn('full_name', concat(col('first_name'), lit(', '), col('last_name'))) \
    .show()

+---+----------+------------+--------------------+
| id|first_name|   last_name|           full_name|
+---+----------+------------+--------------------+
|  1|    Corrie|Van den Oord|Corrie, Van den Oord|
|  2|  Nikolaus|     Brewitt|   Nikolaus, Brewitt|
|  3|    Orelie|      Penney|      Orelie, Penney|
|  4|     Ashby|    Maddocks|     Ashby, Maddocks|
|  5|      Kurt|        Rome|          Kurt, Rome|
+---+----------+------------+--------------------+



In [0]:
# .withColumn - if you want to create column that already exists, the already existing column will be overwritten

users_df \
    .select('id', 'first_name', 'last_name') \
    .withColumn('first_name', col('last_name')) \
    .show()

+---+------------+------------+
| id|  first_name|   last_name|
+---+------------+------------+
|  1|Van den Oord|Van den Oord|
|  2|     Brewitt|     Brewitt|
|  3|      Penney|      Penney|
|  4|    Maddocks|    Maddocks|
|  5|        Rome|        Rome|
+---+------------+------------+



In [0]:
# .withColumnRenamed(old_col_name, new_col_name)

users_df \
    .select('id', 'first_name', 'last_name') \
    .withColumnRenamed('id', 'user_id') \
    .withColumnRenamed('first_name', 'user_first_name') \
    .show()

+-------+---------------+------------+
|user_id|user_first_name|   last_name|
+-------+---------------+------------+
|      1|         Corrie|Van den Oord|
|      2|       Nikolaus|     Brewitt|
|      3|         Orelie|      Penney|
|      4|          Ashby|    Maddocks|
|      5|           Kurt|        Rome|
+-------+---------------+------------+



In [0]:
# .toDF(*new_col_names)

original_col_names = ['id', 'first_name', 'last_name']
target_col_names = ['user_id', 'user_first_name', 'user_last_name']

users_df \
    .select(original_col_names) \
    .toDF(*target_col_names) \
    .show()

+-------+---------------+--------------+
|user_id|user_first_name|user_last_name|
+-------+---------------+--------------+
|      1|         Corrie|  Van den Oord|
|      2|       Nikolaus|       Brewitt|
|      3|         Orelie|        Penney|
|      4|          Ashby|      Maddocks|
|      5|           Kurt|          Rome|
+-------+---------------+--------------+



In [0]:
# col().alias(new_col_name)

users_df \
    .select('id', 'first_name', 'last_name',
    concat(col('first_name'), lit(', '), col('last_name')).alias('full_name')
    ).show()

+---+----------+------------+--------------------+
| id|first_name|   last_name|           full_name|
+---+----------+------------+--------------------+
|  1|    Corrie|Van den Oord|Corrie, Van den Oord|
|  2|  Nikolaus|     Brewitt|   Nikolaus, Brewitt|
|  3|    Orelie|      Penney|      Orelie, Penney|
|  4|     Ashby|    Maddocks|     Ashby, Maddocks|
|  5|      Kurt|        Rome|          Kurt, Rome|
+---+----------+------------+--------------------+

