In [65]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date, date_format, to_date, lit, date_diff,\
months_between, month, year, add_months, date_add, day, weekofyear, weekday
from pyspark.sql.types import *

spark = SparkSession.Builder().appName('Date Function in PySpark').getOrCreate()

In [24]:
df = spark.range(3)
df.show()

+---+
| id|
+---+
|  0|
|  1|
|  2|
+---+



#### current_date() --> to get the current system date

In [25]:
df = df.withColumn('todaysDate', current_date())
df.show()
df.printSchema()

+---+----------+
| id|todaysDate|
+---+----------+
|  0|2024-06-18|
|  1|2024-06-18|
|  2|2024-06-18|
+---+----------+

root
 |-- id: long (nullable = false)
 |-- todaysDate: date (nullable = false)



#### date_format() --> to convert the format of the date to specific format

In [26]:
df = df.withColumn('newFormatDate', date_format(lit('2020-08-11'), 'MM.dd.yyyy'))
df = df.withColumn('newFormatTodaysDate', date_format(df.todaysDate, 'MM.dd.yyyy'))
df.show()
df.printSchema()

+---+----------+-------------+-------------------+
| id|todaysDate|newFormatDate|newFormatTodaysDate|
+---+----------+-------------+-------------------+
|  0|2024-06-18|   08.11.2020|         06.18.2024|
|  1|2024-06-18|   08.11.2020|         06.18.2024|
|  2|2024-06-18|   08.11.2020|         06.18.2024|
+---+----------+-------------+-------------------+

root
 |-- id: long (nullable = false)
 |-- todaysDate: date (nullable = false)
 |-- newFormatDate: string (nullable = true)
 |-- newFormatTodaysDate: string (nullable = false)



#### to_date() --> to convert the string Date in Date format

In [34]:
df = df.withColumn('dateInDateFormat', to_date(df.newFormatTodaysDate,'MM.dd.yyyy'))
df.show()
df.printSchema()

+---+----------+-------------+-------------------+----------------+
| id|todaysDate|newFormatDate|newFormatTodaysDate|dateInDateFormat|
+---+----------+-------------+-------------------+----------------+
|  0|2024-06-18|   08.11.2020|         06.18.2024|      2024-06-18|
|  1|2024-06-18|   08.11.2020|         06.18.2024|      2024-06-18|
|  2|2024-06-18|   08.11.2020|         06.18.2024|      2024-06-18|
+---+----------+-------------+-------------------+----------------+

root
 |-- id: long (nullable = false)
 |-- todaysDate: date (nullable = false)
 |-- newFormatDate: string (nullable = true)
 |-- newFormatTodaysDate: string (nullable = false)
 |-- dateInDateFormat: date (nullable = true)



In [44]:
data = [
    ('2023-09-11', '2023-11-26')
]

schema = ['d1', 'd2']

df = spark.createDataFrame(data, schema)
df.show()
df.printSchema()

+----------+----------+
|        d1|        d2|
+----------+----------+
|2023-09-11|2023-11-26|
+----------+----------+

root
 |-- d1: string (nullable = true)
 |-- d2: string (nullable = true)



In [60]:
# date_diff()
df.withColumn('dateDiff', date_diff(df.d2,df.d1)).show()
# months_between()
df.withColumn('monthsBetween', months_between(df.d2,df.d1)).show()
# add_months()
df.withColumn('addMonths', add_months(df.d2, 4)).show()
df.withColumn('subMonths', add_months(df.d2, -4)).show()
# date_add()
df.withColumn('addDate', date_add(df.d2, 4)).show()
df.withColumn('subDate', date_add(df.d2, -4)).show()
# year()
df.withColumn('year', year(df.d2)).show()
# month()
df.withColumn('month', month(df.d2)).show()
# day()
df.withColumn('day', day(df.d2)).show()

+----------+----------+--------+
|        d1|        d2|dateDiff|
+----------+----------+--------+
|2023-09-11|2023-11-26|      76|
+----------+----------+--------+

+----------+----------+-------------+
|        d1|        d2|monthsBetween|
+----------+----------+-------------+
|2023-09-11|2023-11-26|   2.48387097|
+----------+----------+-------------+

+----------+----------+----------+
|        d1|        d2| addMonths|
+----------+----------+----------+
|2023-09-11|2023-11-26|2024-03-26|
+----------+----------+----------+

+----------+----------+----------+
|        d1|        d2| subMonths|
+----------+----------+----------+
|2023-09-11|2023-11-26|2023-07-26|
+----------+----------+----------+

+----------+----------+----------+
|        d1|        d2|   addDate|
+----------+----------+----------+
|2023-09-11|2023-11-26|2023-11-30|
+----------+----------+----------+

+----------+----------+----------+
|        d1|        d2|   subDate|
+----------+----------+----------+
|2023-09-1

In [64]:
# day()
df.withColumn('day', day(df.d2)).show()
# weekady()
df.withColumn('weekday', weekday(df.d2)).show()
# weekOfYear()
df.withColumn('weekOfYear', weekofyear(df.d2)).show()

+----------+----------+---+
|        d1|        d2|day|
+----------+----------+---+
|2023-09-11|2023-11-26| 26|
+----------+----------+---+

+----------+----------+-------+
|        d1|        d2|weekday|
+----------+----------+-------+
|2023-09-11|2023-11-26|      6|
+----------+----------+-------+

+----------+----------+----------+
|        d1|        d2|weekOfYear|
+----------+----------+----------+
|2023-09-11|2023-11-26|        47|
+----------+----------+----------+



In [66]:
spark.stop()