In [None]:
import os



from pyspark.sql import SparkSession
from pyspark.sql.functions import *

spark = SparkSession.builder.appName("DateTimeFunctions").getOrCreate()

data = [
    ("Manas", "2024-01-15"),
    ("Priya", "2023-12-10"),
    ("Rahul", "2022-08-05")
]

df = spark.createDataFrame(data, ["Name", "JoinDate"])


In [None]:


# Convert string to date
df = df.withColumn("JoinDate", to_date("JoinDate"))


# CURRENT_DATE()
df.select(current_date()).show()

# Output:
# 2026-02-24   (example - current system date)


# CURRENT_TIMESTAMP()
df.select(current_timestamp()).show()

# Output:
# 2026-02-24 16:45:30   (example)



In [None]:

# DATE_ADD()
df.select(date_sub("JoinDate", 10)).show()

# Output:
# +----------------------+
# |date_add(JoinDate,10) |
# +----------------------+
# |2024-01-25            |
# |2023-12-20            |
# |2022-08-15            |
# +----------------------+


# DATEDIFF()
df.select(datediff(current_date(), "JoinDate")).show()
df.select(datediff(current_date() , "JoinDate"))
# Output:
# Days difference from today


+----------------------+
|date_sub(JoinDate, 10)|
+----------------------+
|            2024-01-05|
|            2023-11-30|
|            2022-07-26|
+----------------------+

+----------------------------------+
|datediff(current_date(), JoinDate)|
+----------------------------------+
|                               773|
|                               809|
|                              1301|
+----------------------------------+



DataFrame[datediff(current_date(), JoinDate): int]

In [None]:


# YEAR()
df.select(year("JoinDate")).show()

# Output:
# 2024
# 2023
# 2022


# MONTH()
df.select(month("JoinDate")).show()

# Output:
# 1
# 12
# 8


# DAY()
df.select(dayofmonth("JoinDate")).show()

# Output:
# 15
# 10
# 5


In [None]:


# TO_DATE()
df.select(to_date("JoinDate")).show()

# Output:
# Converted date format

# | Pattern | Meaning                    |
# | ------- | -------------------------- |
# | yyyy    | 4-digit year               |
# | yy      | 2-digit year               |
# | MM      | Month (02)                 |
# | MMM     | Month name short (Feb)     |
# | MMMM    | Full month name (February) |
# | dd      | Day (20)                   |
# | E       | Day name short (Tue)       |
# | EEEE    | Full day name              |

# DATE_FORMAT()
df.select(date_format("JoinDate", "dd-MM-yyyy")).show()
df.select(date_format("JoinDate" , "dd MMM yy")).show()

# Output:
# 15-01-2024
# 10-12-2023
# 05-08-2022



+-----------------+
|to_date(JoinDate)|
+-----------------+
|       2024-01-15|
|       2023-12-10|
|       2022-08-05|
+-----------------+

+---------------------------------+
|date_format(JoinDate, dd-MM-yyyy)|
+---------------------------------+
|                       15-01-2024|
|                       10-12-2023|
|                       05-08-2022|
+---------------------------------+

+--------------------------------+
|date_format(JoinDate, dd MMM yy)|
+--------------------------------+
|                       15 Jan 24|
|                       10 Dec 23|
|                       05 Aug 22|
+--------------------------------+



In [None]:
# Important Time Patterns

# Pattern	Meaning
# HH	24-hour (00–23)
# hh	12-hour (01–12)
# mm	Minutes
# ss	Seconds
# a	AM / PM