In [0]:
from pyspark.sql import SparkSession

# Create SparkSession
spark = SparkSession.builder \
          .appName('SparkByExamples.com') \
          .getOrCreate()

from pyspark.sql.functions import *

df=spark.createDataFrame(
        data = [ ("1","2019-06-24 12:01:19.000")],
        schema=["id","input_timestamp"])
df.printSchema()

#Timestamp String to DateType
df.withColumn("timestamp",to_timestamp("input_timestamp")) \
  .show(truncate=False)
  
# Using Cast to convert TimestampType to DateType
df.withColumn('timestamp', \
         to_timestamp('input_timestamp').cast('string')) \
  .show(truncate=False)
  

df.select(to_timestamp(lit('06-24-2019 12:01:19.000'),'MM-dd-yyyy HH:mm:ss.SSSS')) \
  .show(truncate=False)
  

#SQL string to TimestampType
spark.sql("select to_timestamp('2019-06-24 12:01:19.000') as timestamp")
#SQL CAST timestamp string to TimestampType
spark.sql("select timestamp('2019-06-24 12:01:19.000') as timestamp")
#SQL Custom string to TimestampType
spark.sql("select to_timestamp('06-24-2019 12:01:19.000','MM-dd-yyyy HH:mm:ss.SSSS') as timestamp")

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)

+---+-----------------------+-------------------+
|id |input_timestamp        |timestamp          |
+---+-----------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|
+---+-----------------------+-------------------+

+---+-----------------------+-------------------+
|id |input_timestamp        |timestamp          |
+---+-----------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|
+---+-----------------------+-------------------+

+---------------------------------------------------------------+
|to_timestamp(06-24-2019 12:01:19.000, MM-dd-yyyy HH:mm:ss.SSSS)|
+---------------------------------------------------------------+
|2019-06-24 12:01:19                                            |
+---------------------------------------------------------------+

Out[1]: DataFrame[timestamp: timestamp]

In [0]:
#The code starts by importing the necessary modules: SparkSession from pyspark.sql and all the functions from pyspark.sql.functions. These modules are required for creating a SparkSession and performing timestamp conversions.

#A SparkSession is created using the SparkSession.builder API. The appName parameter sets the name of the Spark application. If an existing SparkSession with the same name exists, it will be retrieved; otherwise, a new SparkSession will be created.

#A DataFrame is created with a single row containing two columns: "id" and "input_timestamp". The "input_timestamp" column contains a timestamp string in the format "yyyy-MM-dd HH:mm:ss.SSS".

#The schema of the DataFrame is printed using the printSchema() function.

#The to_timestamp() function is used to convert the "input_timestamp" column to a TimestampType. The resulting column is named "timestamp". The DataFrame is displayed using the show() function.

#Another way to convert the "input_timestamp" column to a TimestampType is by using the cast() function. First, the to_timestamp() function is applied to convert the string to a TimestampType, and then the cast() function is used to convert it back to a string. The resulting column is named "timestamp". The DataFrame is displayed using the show() function.

#The to_timestamp() function is used in conjunction with the lit() function to convert a hardcoded timestamp string ("06-24-2019 12:01:19.000") to a TimestampType. The resulting column is named "timestamp". The DataFrame is displayed using the show() function.

#The code also demonstrates the same timestamp conversions using Spark SQL queries. Three separate queries are executed: one using the to_timestamp() function with a hardcoded timestamp string, one using the timestamp() function with a hardcoded timestamp string, and one using the to_timestamp() function with a custom timestamp string format ("MM-dd-yyyy HH:mm:ss.SSSS"). These queries return a DataFrame with a single column named "timestamp".

#These code snippets showcase different methods to convert timestamp strings to TimestampType in PySpark, providing flexibility in handling timestamp data within Spark DataFrames.
