# TO_TIMESTAMP()

In [1]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

spark = (
    SparkSession.builder
    .appName("example-timestamp")
    .getOrCreate()
)

In [11]:
df=spark.createDataFrame(
        data   = [ ("1","2019-06-24 12:01:19.000")],
        schema = ["id","input_timestamp"])

df.printSchema()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)



In [12]:
# Timestamp String to DateType
df = df.withColumn("timestamp",to_timestamp("input_timestamp"))
df.show(truncate=False)
df.printSchema()


+---+-----------------------+-------------------+
|id |input_timestamp        |timestamp          |
+---+-----------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|
+---+-----------------------+-------------------+

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)



In [14]:
# Using Cast to convert TimestampType to DateType
df = df.withColumn('timestamp_string', to_timestamp('timestamp').cast('string'))
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- timestamp_string: string (nullable = true)



## Custom string format to Timestamp type

In [15]:
df.select(to_timestamp(lit('06-24-2019 12:01:19.000'),'MM-dd-yyyy HH:mm:ss.SSSS')).show()

+-------------------------------------------------------------------+
|to_timestamp('06-24-2019 12:01:19.000', 'MM-dd-yyyy HH:mm:ss.SSSS')|
+-------------------------------------------------------------------+
|                                                2019-06-24 12:01:19|
+-------------------------------------------------------------------+



## SQL Example

In [18]:
# SQL string to TimestampType
spark.sql("select to_timestamp('2019-06-24 12:01:19.000') as timestamp").show()

# SQL CAST timestamp string to TimestampType
spark.sql("select timestamp('2019-06-24 12:01:19.000') as timestamp").show()

# SQL Custom string to TimestampType
spark.sql("select to_timestamp('06-24-2019 12:01:19.000','MM-dd-yyyy HH:mm:ss.SSSS') as timestamp").show()


+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



Which of the following code blocks creates a new one-column, two-row DataFrame dfDates with column date of type timestamp?

>

- `dfDates = spark.createDataFrame(["23/01/2022 11:28:12","24/01/2022 10:58:34"], ["date"])`
- `dfDates = dfDates.withColumn("date", to_timestamp("dd/MM/yyyy HH:mm:ss", "date"))`
- `dfDates = spark.createDataFrame([("23/01/2022 11:28:12",),("24/01/2022 10:58:34",)], ["date"])`
- `dfDates = dfDates.withColumnRenamed("date", to_timestamp("date", "yyyy-MM-dd HH:mm:ss"))`
- `dfDates = spark.createDataFrame([("23/01/2022 11:28:12",),("24/01/2022 10:58:34",)], ["date"])`
- `dfDates = dfDates.withColumn("date", to_timestamp("date", "dd/MM/yyyy HH:mm:ss"))`

In [29]:
dfDates = dfDates.withColumn("date", to_timestamp("date", "dd/MM/yyyy HH:mm:ss"))
dfDates.printSchema()
dfDates.show()

root
 |-- date: timestamp (nullable = true)

+-------------------+
|               date|
+-------------------+
|2022-01-23 11:28:12|
|2022-01-24 10:58:34|
+-------------------+



In [25]:
dfDates = spark.createDataFrame([("23/01/2022 11:28:12",),("24/01/2022 10:58:34",)], ["date"])
dfDates.printSchema()
dfDates.show()

root
 |-- date: string (nullable = true)

+-------------------+
|               date|
+-------------------+
|23/01/2022 11:28:12|
|24/01/2022 10:58:34|
+-------------------+



In [26]:
dfDates = spark.createDataFrame([("23/01/2022 11:28:12",),("24/01/2022 10:58:34",)], ["date"])
dfDates.printSchema()
dfDates.show()

root
 |-- date: string (nullable = true)

+-------------------+
|               date|
+-------------------+
|23/01/2022 11:28:12|
|24/01/2022 10:58:34|
+-------------------+



In [None]:
# TypeError: Can not infer schema for type: <class 'str'>
dfDates = spark.createDataFrame(["23/01/2022 11:28:12","24/01/2022 10:58:34"], ["date"])

# TypeError: Column is not iterable
dfDates = dfDates.withColumnRenamed("date", to_timestamp("date", "yyyy-MM-dd HH:mm:ss"))

# NameError: name 'dfDates' is not defined
dfDates = dfDates.withColumn("date", to_timestamp("dd/MM/yyyy HH:mm:ss", "date"))
