## 02-pyspark-string-timestamp.py

In [0]:
# 02-pyspark-string-timestamp.py
from pyspark.sql import SparkSession

# Create SparkSession
spark = SparkSession.builder \
          .appName('PySparkExamples') \
          .getOrCreate()

from pyspark.sql.functions import *

df = spark.createDataFrame(
        data = [ ("1", "2019-06-24 12:01:19.000")],
        schema = ["id", "input_timestamp"])
df.printSchema()
df.show()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)

+---+--------------------+
| id|     input_timestamp|
+---+--------------------+
|  1|2019-06-24 12:01:...|
+---+--------------------+



In [0]:
#Timestamp String to DateType
df1 = df.withColumn("timestamp", to_timestamp("input_timestamp"))
df1.printSchema()
df1.show(truncate = False)

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)

+---+-----------------------+-------------------+
|id |input_timestamp        |timestamp          |
+---+-----------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|
+---+-----------------------+-------------------+



In [0]:
# Using Cast to convert TimestampType to DateType
df2 = df1.withColumn('new_timestamp', \
         to_timestamp('timestamp').cast('string'))
df2.printSchema()
df2.show(truncate = False)

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- new_timestamp: string (nullable = true)

+---+-----------------------+-------------------+-------------------+
|id |input_timestamp        |timestamp          |new_timestamp      |
+---+-----------------------+-------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|2019-06-24 12:01:19|
+---+-----------------------+-------------------+-------------------+



In [0]:
df3 = df.select(to_timestamp(lit('06-24-2019 12:01:19.000'),'MM-dd-yyyy HH:mm:ss.SSSS') \
      .alias("new_column"))
df3.printSchema()
df3.show(truncate = False)

root
 |-- new_column: timestamp (nullable = true)

+-------------------+
|new_column         |
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



In [0]:
# SQL string to TimestampType
df4 = spark.sql("select to_timestamp('2019-06-24 12:01:19.000') as timestamp")
df4.printSchema()
df4.show()

root
 |-- timestamp: timestamp (nullable = true)

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



In [0]:
# SQL CAST timestamp string to TimestampType
df5 = spark.sql("select timestamp('2019-06-24 12:01:19.000') as timestamp")
df5.printSchema()
df5.show()

root
 |-- timestamp: timestamp (nullable = true)

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



In [0]:
# SQL Custom string to TimestampType
df6 = spark.sql("select to_timestamp('06-24-2019 12:01:19.000','MM-dd-yyyy HH:mm:ss.SSSS') as timestamp")
df6.printSchema()
df6.show()

root
 |-- timestamp: timestamp (nullable = true)

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+

