#### Setting environment variables

In [0]:
import os
import sys
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable

#### Create Spark Session

In [0]:

from pyspark.sql import SparkSession
spark = SparkSession.builder\
        .appName("DateAndTime")\
        .getOrCreate()

In [0]:
df=spark.createDataFrame(
    data=[("1","2019-06-24 12:01:19.000")],
    schema=["id","input_timestamp"]
)
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)



#### Casting to TimeStamp

##### to_timestamp() for casting

In [0]:
from pyspark.sql.functions import to_timestamp
df.select("id",to_timestamp("input_timestamp").alias("timestamp")).printSchema()

root
 |-- id: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)



In [0]:
df2=df.withColumn("timestamp",to_timestamp("input_timestamp"))
df2.printSchema()
df2.show()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)

+---+--------------------+-------------------+
| id|     input_timestamp|          timestamp|
+---+--------------------+-------------------+
|  1|2019-06-24 12:01:...|2019-06-24 12:01:19|
+---+--------------------+-------------------+



In [0]:
from pyspark.sql.functions import col
df2.select(col("id"),col("timestamp"),col("timestamp").cast("string")).printSchema()

root
 |-- id: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- timestamp: string (nullable = true)



In [0]:
from pyspark.sql.functions import current_timestamp,date_format
df2.withColumn("current",date_format(current_timestamp(),"dd-MM-yyyy HH:mm:ss")).show()

+---+--------------------+-------------------+-------------------+
| id|     input_timestamp|          timestamp|            current|
+---+--------------------+-------------------+-------------------+
|  1|2019-06-24 12:01:...|2019-06-24 12:01:19|09-03-2024 06:48:34|
+---+--------------------+-------------------+-------------------+



#### Using SQL Example

In [0]:
df.createOrReplaceTempView("sample_time")
spark.sql("select int(id), to_timestamp(input_timestamp) as time_stamp, date_format(current_timestamp(), 'dd/MM/yyyy HH:mm:ss') as create_time from sample_time").show()

+---+-------------------+-------------------+
| id|         time_stamp|        create_time|
+---+-------------------+-------------------+
|  1|2019-06-24 12:01:19|09/03/2024 07:27:30|
+---+-------------------+-------------------+



In [0]:
spark.sql("select to_timestamp('06-24-2019 12:01:19.000','MM-dd-yyyy HH:mm:ss.SSSS') as timestamp").show()

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



#### to_date() function

In [0]:

df=spark.createDataFrame(
        data = [ ("1","2019-06-24 12:01:19.000")],
        schema=["id","input_timestamp"])
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)



In [0]:
from pyspark.sql.functions import to_date
df.withColumn("date",to_date('input_timestamp')).show()

+---+--------------------+----------+
| id|     input_timestamp|      date|
+---+--------------------+----------+
|  1|2019-06-24 12:01:...|2019-06-24|
+---+--------------------+----------+



In [0]:
df.withColumn("date", to_date("input_timestamp",'yyyy-MM-dd HH:mm:ss.SSSS')).show()

+---+--------------------+----------+
| id|     input_timestamp|      date|
+---+--------------------+----------+
|  1|2019-06-24 12:01:...|2019-06-24|
+---+--------------------+----------+



In [0]:
from pyspark.sql.functions import col
df.withColumn("ts",to_timestamp(col("input_timestamp")))\
    .withColumn("date",to_date(col("ts"))).show()

+---+--------------------+-------------------+----------+
| id|     input_timestamp|                 ts|      date|
+---+--------------------+-------------------+----------+
|  1|2019-06-24 12:01:...|2019-06-24 12:01:19|2019-06-24|
+---+--------------------+-------------------+----------+



In [0]:
df.withColumn("date",col("input_timestamp").cast("date")).show()

+---+--------------------+----------+
| id|     input_timestamp|      date|
+---+--------------------+----------+
|  1|2019-06-24 12:01:...|2019-06-24|
+---+--------------------+----------+



In [0]:
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)



In [0]:
df.withColumn('date', to_timestamp('input_timestamp').cast('date'))\
    .show()

+---+--------------------+----------+
| id|     input_timestamp|      date|
+---+--------------------+----------+
|  1|2019-06-24 12:01:...|2019-06-24|
+---+--------------------+----------+



#### spark sql

In [0]:
spark.sql("select date('2019-06-24 12:01:19.000') as date").show()

+----------+
|      date|
+----------+
|2019-06-24|
+----------+



In [0]:
spark.sql("select date(to_timestamp('2019-06-24 12:01:19.000')) as date_type").show()

+----------+
| date_type|
+----------+
|2019-06-24|
+----------+



In [0]:
spark.sql("select to_date('06-24-2019 12:01:19.000','MM-dd-yyyy HH:mm:ss.SSSS') as date_type").show()


+----------+
| date_type|
+----------+
|2019-06-24|
+----------+



#### date_format() function

In [0]:
df=spark.createDataFrame([["1"]],["id"])

#### various formats

In [0]:
from pyspark.sql.functions import current_date
df.select(current_date().alias("current_date"),
         date_format(current_timestamp(), "yyyy-MM-dd").alias("yyyy-MM-dd"),
         date_format(current_timestamp(), "yyyy/MM/dd HH:mm").alias("yyyy/MM/dd HH:mm"),
         date_format(current_timestamp(), "yyyy MMM dd").alias("yyyy MMM dd"),
         date_format(current_timestamp(), "dd MMMM yyyy").alias("dd MMMM yyyy"),
         date_format(current_timestamp(), "dd-MM-yyyy hh:mm:ss a").alias("am/pm"),
         date_format(current_timestamp(), "HH:mm EEEE").alias("HH:mm day"))\
        .show(truncate = False)

+------------+----------+----------------+-----------+-------------+----------------------+--------------+
|current_date|yyyy-MM-dd|yyyy/MM/dd HH:mm|yyyy MMM dd|dd MMMM yyyy |am/pm                 |HH:mm day     |
+------------+----------+----------------+-----------+-------------+----------------------+--------------+
|2024-03-09  |2024-03-09|2024/03/09 07:51|2024 Mar 09|09 March 2024|09-03-2024 07:51:56 AM|07:51 Saturday|
+------------+----------+----------------+-----------+-------------+----------------------+--------------+



#### datediff() function

In [0]:
data = [("1","2019-07-01"),("2","2019-06-24"),("3","2019-08-24")]
df=spark.createDataFrame(data=data,schema=["id","date"])

In [0]:
from pyspark.sql.functions import datediff
df.select(col("date"),
         current_date().alias("today"),
         datediff(current_date(), col("date")))\
        .show()

+----------+----------+------------------------------+
|      date|     today|datediff(current_date(), date)|
+----------+----------+------------------------------+
|2019-07-01|2024-03-09|                          1713|
|2019-06-24|2024-03-09|                          1720|
|2019-08-24|2024-03-09|                          1659|
+----------+----------+------------------------------+



#### months_between() function

In [0]:
from pyspark.sql.functions import round,lit
from pyspark.sql.functions import months_between
df.withColumn("months_between", months_between(current_date(), col("date")))\
    .withColumn("months_between_rounded", round(months_between(current_date(), col("date")), 2))\
    .withColumn("years_between", round(months_between(current_date(), col("date"))/ lit('12'), 2))\
    .show()

+---+----------+--------------+----------------------+-------------+
| id|      date|months_between|months_between_rounded|years_between|
+---+----------+--------------+----------------------+-------------+
|  1|2019-07-01|   56.25806452|                 56.26|         4.69|
|  2|2019-06-24|   56.51612903|                 56.52|         4.71|
|  3|2019-08-24|   54.51612903|                 54.52|         4.54|
+---+----------+--------------+----------------------+-------------+



In [0]:
df.show()

+---+----------+
| id|      date|
+---+----------+
|  1|2019-07-01|
|  2|2019-06-24|
|  3|2019-08-24|
+---+----------+

