In [0]:
from pyspark.sql.functions import *

In [0]:
help(lpad)

Help on function lpad in module pyspark.sql.functions:

lpad(col: 'ColumnOrName', len: int, pad: str) -> pyspark.sql.column.Column
    Left-pad the string column to width `len` with `pad`.
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    col : :class:`~pyspark.sql.Column` or str
        target column to work on.
    len : int
        length of the final string.
    pad : str
        chars to prepend.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        left padded result.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('abcd',)], ['s',])
    >>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
    [Row(s='##abcd')]



In [0]:
l = [('X',)]

In [0]:
df = spark.createDataFrame(l, 'dummy string')

In [0]:
df.show()

+-----+
|dummy|
+-----+
|    X|
+-----+



In [0]:
df.select(lpad(lit("hello"), 10, '-').alias('dummy')).show()

+----------+
|     dummy|
+----------+
|-----hello|
+----------+



In [0]:
employees = [
    (1, "Scott", "Tiger", 1000.0, "united states", "+1 123 456 7890", "123 45 6789"),
    (2, "Henry", "Ford", 1250.0, "india", "+91 234 567 8901", "456 78 9123"),
    (3, "Nick", "Junior", 750.0, "united kingdom", "+44 111 111 111", "222 33 4444"),
    (4, "Bill", "Gomes", 1500.0, "australia", "+61 987 654 3210", "789 12 6118"),
]

In [0]:
empdf = spark.createDataFrame(employees, schema = """employee_id INT, first_name STRING,
                              last_name STRING, salary FLOAT, nationality STRING,
                              phone_number STRING, ssn STRING                              
                               """)

In [0]:
empdf.show()

+-----------+----------+---------+------+--------------+----------------+-----------+
|employee_id|first_name|last_name|salary|   nationality|    phone_number|        ssn|
+-----------+----------+---------+------+--------------+----------------+-----------+
|          1|     Scott|    Tiger|1000.0| united states| +1 123 456 7890|123 45 6789|
|          2|     Henry|     Ford|1250.0|         india|+91 234 567 8901|456 78 9123|
|          3|      Nick|   Junior| 750.0|united kingdom| +44 111 111 111|222 33 4444|
|          4|      Bill|    Gomes|1500.0|     australia|+61 987 654 3210|789 12 6118|
+-----------+----------+---------+------+--------------+----------------+-----------+



In [0]:
empfixedDf = empdf.select(
    concat(
        lpad("employee_id",5,"0"),
        rpad("first_name",10,"-"),
        rpad("last_name",10,"-"),
        lpad("salary",10,"0"),
        rpad("nationality",15,"-"),
        rpad("phone_number",17,"-"),
        "ssn"
    ).alias("employee")
)

In [0]:
empfixedDf.show(truncate=False)

+------------------------------------------------------------------------------+
|employee                                                                      |
+------------------------------------------------------------------------------+
|00001Scott-----Tiger-----00001000.0united states--+1 123 456 7890--123 45 6789|
|00002Henry-----Ford------00001250.0india----------+91 234 567 8901-456 78 9123|
|00003Nick------Junior----00000750.0united kingdom-+44 111 111 111--222 33 4444|
|00004Bill------Gomes-----00001500.0australia------+61 987 654 3210-789 12 6118|
+------------------------------------------------------------------------------+

