In [0]:
from pyspark.sql.functions import * 
from pyspark.sql.types import *

In [0]:
date_df = spark.range(1).withColumn('today',current_date()) \
            .withColumn('now',current_timestamp())
date_df.show()

+---+----------+--------------------+
| id|     today|                 now|
+---+----------+--------------------+
|  0|2023-07-29|2023-07-29 15:57:...|
+---+----------+--------------------+



In [0]:
#Adding and sub date from current date
date_df.select(col('today'),date_add(col('today'),5), date_sub(col('today'),5)).show()

+----------+------------------+------------------+
|     today|date_add(today, 5)|date_sub(today, 5)|
+----------+------------------+------------------+
|2023-07-23|        2023-07-28|        2023-07-18|
+----------+------------------+------------------+



In [0]:
# difference between dates in days and months

date_df.withColumn('week ago', date_sub(col('today'),7)) \
    .select(datediff(col('today'),col('week ago'))) \
    .show()

+-------------------------+
|datediff(today, week ago)|
+-------------------------+
|                        7|
+-------------------------+



In [0]:
date_df.select(to_date(lit('2023-01-04')).alias("Start date"), \
        to_date(lit('2023-07-25')).alias("End date"), \
        round(months_between(col("Start date"),col("End date"))).alias(" diff in months")
               ).show()

+----------+----------+---------------+
|Start date|  End date| diff in months|
+----------+----------+---------------+
|2023-01-04|2023-07-25|           -7.0|
+----------+----------+---------------+



In [0]:
# If the date format is wrong in databrciks it will be treated as NULL. 
date_df.select(to_date(lit('2023-22-05')).alias("wrong date format"), to_date(col('today')).alias("Date correct format")).show()

+-----------------+-------------------+
|wrong date format|Date correct format|
+-----------------+-------------------+
|             null|         2023-07-23|
+-----------------+-------------------+



In [0]:
# date should be 4 th May but displaying as 5 th April.
date_df.select(to_date(lit('2023-04-05')).alias("wrong date format")).show()

+-----------------+
|wrong date format|
+-----------------+
|       2023-04-05|
+-----------------+



In [0]:
# Solution to the above problem. 
dateFormat = 'yyyy-dd-MM'
date_df.select(to_date(lit('2023-04-05'),dateFormat).alias("right date "), \
               to_timestamp(lit('2023-04-05'),dateFormat).alias("Risht timestamp")).show()

+-----------+-------------------+
|right date |    Risht timestamp|
+-----------+-------------------+
| 2023-05-04|2023-05-04 00:00:00|
+-----------+-------------------+



In [0]:
# To convert a string to date we have to use to_date function. Here the first arg is the date string and 2 nd arg is the format in which the date is specified in the date string. The output will be a date in the default date format eg. yyyy-MM-dd
date_df.select(to_date(lit('29-07-2023'),'dd-MM-yyyy').alias("Diff date format")).show()

+----------------+
|Diff date format|
+----------------+
|      2023-07-29|
+----------------+



In [0]:
# Convert a date to string by using date_format function. 
date_df.select(date_format(to_date(lit('29-07-2023'),'dd-MM-yyyy'),'dd/MM/yyyy').alias("Date Formated")).show()

+-------------+
|Date Formated|
+-------------+
|   29/07/2023|
+-------------+



In [0]:
date_df.select(date_format(to_date(lit('29-07-2023'),'dd-MM-yyyy'),'yyyyMMdd').alias("Date Formated")).show()

+-------------+
|Date Formated|
+-------------+
|          210|
+-------------+



In [0]:
date_df.select(date_format(to_date(lit('29-07-2023'),'dd-MM-yyyy'),'D').alias("day-of-year")).show()

+-----------+
|day-of-year|
+-----------+
|        210|
+-----------+

