In [0]:
from pyspark.sql.functions import *

In [0]:
datetimes = [
    ("20140228", "28-Feb-2014 10:00:00.123"),
    ("20160329", "29-Mar-2016 11:23:00.234"),
    ("20180420", "20-Apr-2018 12:34:00.543"),
    ("20190512", "12-May-2019 13:21:00.567")
]

In [0]:
datedf = spark.createDataFrame(datetimes, schema = 'date STRING, time STRING')

In [0]:
datedf.show(truncate=False)

+--------+------------------------+
|date    |time                    |
+--------+------------------------+
|20140228|28-Feb-2014 10:00:00.123|
|20160329|29-Mar-2016 11:23:00.234|
|20180420|20-Apr-2018 12:34:00.543|
|20190512|12-May-2019 13:21:00.567|
+--------+------------------------+



In [0]:
help(to_date)

Help on function to_date in module pyspark.sql.functions:

to_date(col: 'ColumnOrName', format: Optional[str] = None) -> pyspark.sql.column.Column
    Converts a :class:`~pyspark.sql.Column` into :class:`pyspark.sql.types.DateType`
    using the optionally specified format. Specify formats according to `datetime pattern`_.
    By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format
    is omitted. Equivalent to ``col.cast("date")``.
    
    .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
    
    .. versionadded:: 2.2.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    col : :class:`~pyspark.sql.Column` or str
        input column of values to convert.
    format: str, optional
        format to use to convert date values.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        date value as :class:`pyspark.sql.types.DateType` type.
    
    

In [0]:
l = [('X',)]

In [0]:
df = spark.createDataFrame(l, schema = "dummy string")

In [0]:
df.show()

+-----+
|dummy|
+-----+
|    X|
+-----+



In [0]:
df.select(to_date(lit('20230302'), 'yyyyMMdd').alias('to_date')).show()

+----------+
|   to_date|
+----------+
|2023-03-02|
+----------+



In [0]:
df.select(to_date(lit('2021061'), 'yyyyDDD').alias('to_date')).show()

+----------+
|   to_date|
+----------+
|2021-03-02|
+----------+



In [0]:
df.select(to_date(lit('02/03/2021'), 'dd/MM/yyyy').alias('to_date')).show()

+----------+
|   to_date|
+----------+
|2021-03-02|
+----------+



In [0]:
df.select(to_date(lit('02-03-2021'), 'dd-MM-yyyy').alias('to_date')).show()

+----------+
|   to_date|
+----------+
|2021-03-02|
+----------+



In [0]:
df.select(to_date(lit('02-Mar-2021'), 'dd-MMM-yyyy').alias('to_date')).show()

+----------+
|   to_date|
+----------+
|2021-03-02|
+----------+



In [0]:
df.select(to_date(lit('March 2, 2021'), 'MMMM d, yyyy').alias('to_date')).show()

+----------+
|   to_date|
+----------+
|2021-03-02|
+----------+



In [0]:
help(to_timestamp)

Help on function to_timestamp in module pyspark.sql.functions:

to_timestamp(col: 'ColumnOrName', format: Optional[str] = None) -> pyspark.sql.column.Column
    Converts a :class:`~pyspark.sql.Column` into :class:`pyspark.sql.types.TimestampType`
    using the optionally specified format. Specify formats according to `datetime pattern`_.
    By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format
    is omitted. Equivalent to ``col.cast("timestamp")``.
    
    .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
    
    .. versionadded:: 2.2.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    col : :class:`~pyspark.sql.Column` or str
        column values to convert.
    format: str, optional
        format to use to convert timestamp values.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        timestamp value as :class:`pyspark.sql.types

In [0]:
df.select(to_timestamp(lit('02-Mar-2021'), 'dd-MMM-yyyy').alias('to_date')).show()

+-------------------+
|            to_date|
+-------------------+
|2021-03-02 00:00:00|
+-------------------+



In [0]:
df.select(to_timestamp(lit('02-Mar-2021 17:30:15'), 'dd-MMM-yyyy HH:mm:ss').alias('to_date')).show()

+-------------------+
|            to_date|
+-------------------+
|2021-03-02 17:30:15|
+-------------------+



In [0]:
datedf.printSchema()

root
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)



In [0]:
datedf.show(truncate=False)

+--------+------------------------+
|date    |time                    |
+--------+------------------------+
|20140228|28-Feb-2014 10:00:00.123|
|20160329|29-Mar-2016 11:23:00.234|
|20180420|20-Apr-2018 12:34:00.543|
|20190512|12-May-2019 13:21:00.567|
+--------+------------------------+



In [0]:
datedf. \
    withColumn("to_date", to_date('date', 'yyyyMMdd')). \
    withColumn("to_timestamp", to_timestamp(col('time'), 'dd-MMM-yyyy HH:mm:ss.SSS')).\
    show(truncate = False)

+--------+------------------------+----------+-----------------------+
|date    |time                    |to_date   |to_timestamp           |
+--------+------------------------+----------+-----------------------+
|20140228|28-Feb-2014 10:00:00.123|2014-02-28|2014-02-28 10:00:00.123|
|20160329|29-Mar-2016 11:23:00.234|2016-03-29|2016-03-29 11:23:00.234|
|20180420|20-Apr-2018 12:34:00.543|2018-04-20|2018-04-20 12:34:00.543|
|20190512|12-May-2019 13:21:00.567|2019-05-12|2019-05-12 13:21:00.567|
+--------+------------------------+----------+-----------------------+

