In [133]:
import polars as pl
import datetime
from warnings import filterwarnings

filterwarnings('ignore')

In [134]:
business_lf = pl.scan_parquet(r"C:\Users\Rudra\Desktop\yelp\parquet-data\business.parquet")
checkin_lf = pl.scan_parquet(r"C:\Users\Rudra\Desktop\yelp\parquet-data\checkin.parquet")
review_lf = pl.scan_parquet(r"C:\Users\Rudra\Desktop\yelp\parquet-data\review.parquet")
tip_lf = pl.scan_parquet(r"C:\Users\Rudra\Desktop\yelp\parquet-data\tip.parquet")
user_lf = pl.scan_parquet(r"C:\Users\Rudra\Desktop\yelp\parquet-data\yelp_user.parquet")

In [135]:
user_lf.collect_schema()

Schema([('user_id', String),
        ('name', String),
        ('review_count', Int64),
        ('yelping_since', String),
        ('useful', Int64),
        ('funny', Int64),
        ('cool', Int64),
        ('elite', String),
        ('friends', String),
        ('fans', Int64),
        ('average_stars', Float64),
        ('compliment_hot', Int64),
        ('compliment_more', Int64),
        ('compliment_profile', Int64),
        ('compliment_cute', Int64),
        ('compliment_list', Int64),
        ('compliment_note', Int64),
        ('compliment_plain', Int64),
        ('compliment_cool', Int64),
        ('compliment_funny', Int64),
        ('compliment_writer', Int64),
        ('compliment_photos', Int64)])

In [136]:
user_lf.select(['yelping_since']).collect()

yelping_since
str
"""2007-01-25 16:47:26"""
"""2009-01-25 04:35:42"""
"""2008-07-25 10:41:00"""
"""2005-11-29 04:38:33"""
"""2007-01-05 19:40:59"""
…
"""2015-01-06 00:31:31"""
"""2016-06-14 07:20:52"""
"""2017-02-04 15:31:58"""
"""2011-01-14 00:29:08"""


# here is code

user_lf = pl.DataFrame({
    "user_id": ["u1", "u2", "u3"],
    "yelping_since": [
        "2007-01-25 16:47:26",
        "2009-01-25 04:35:42",
        "2020-12-19 02:32:39"
    ]
}).lazy()


# <strong style="color:#5e17eb"> 1. Creating & Parsing Date/Time Columns </strong>


## <strong style="color:#5e17eb"> Parse from String </strong>


In [137]:
user_lf.head(1).collect()

user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
str,str,i64,str,i64,i64,i64,str,str,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""qVc8ODYU5SZjKXVBgXdI7w""","""Walker""",585,"""2007-01-25 16:47:26""",7217,1259,5994,"""2007""","""NSCy54eWehBJyZdG2iE84w, pe42u7…",267,3.91,250,65,55,56,18,232,844,467,467,239,180


%Y = year, %m = month, %d = day, %H = hour, %M = minute, %S = seconds.



In [138]:
user_lf.with_columns(
    pl.col("yelping_since") \
        .str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
        .alias("yelping_since_dt")
).select(['yelping_since',	'yelping_since_dt']).collect()

yelping_since,yelping_since_dt
str,datetime[μs]
"""2007-01-25 16:47:26""",2007-01-25 16:47:26
"""2009-01-25 04:35:42""",2009-01-25 04:35:42
"""2008-07-25 10:41:00""",2008-07-25 10:41:00
"""2005-11-29 04:38:33""",2005-11-29 04:38:33
"""2007-01-05 19:40:59""",2007-01-05 19:40:59
…,…
"""2015-01-06 00:31:31""",2015-01-06 00:31:31
"""2016-06-14 07:20:52""",2016-06-14 07:20:52
"""2017-02-04 15:31:58""",2017-02-04 15:31:58
"""2011-01-14 00:29:08""",2011-01-14 00:29:08


## <strong style="color:#5e17eb">Auto-parse  </strong>


In [139]:
user_lf.with_columns(
    pl.col("yelping_since") 
        .str.to_datetime()
        .alias("yelping_since_dt")
).select(['yelping_since',	'yelping_since_dt']).collect()

yelping_since,yelping_since_dt
str,datetime[μs]
"""2007-01-25 16:47:26""",2007-01-25 16:47:26
"""2009-01-25 04:35:42""",2009-01-25 04:35:42
"""2008-07-25 10:41:00""",2008-07-25 10:41:00
"""2005-11-29 04:38:33""",2005-11-29 04:38:33
"""2007-01-05 19:40:59""",2007-01-05 19:40:59
…,…
"""2015-01-06 00:31:31""",2015-01-06 00:31:31
"""2016-06-14 07:20:52""",2016-06-14 07:20:52
"""2017-02-04 15:31:58""",2017-02-04 15:31:58
"""2011-01-14 00:29:08""",2011-01-14 00:29:08


## <strong style="color:#5e17eb"> From Timestamps </strong>


In [140]:
pl.DataFrame({
    "custom_date": [pl.date(2025, 8, 5)]
})


custom_date
object
"2025-08-05 00:00:00.alias(""datetime"").strict_cast(Date).alias(""date"")"


## <strong style="color:#5e17eb">Create from Components  </strong>


In [141]:
user_lf.with_columns(
    pl.col("yelping_since").str.to_datetime().alias("yelping_since_dt")
).with_columns([
    pl.col("yelping_since_dt").dt.year().alias("year_joined"),
    pl.col("yelping_since_dt").dt.month().alias("month_joined"),
    pl.col("yelping_since_dt").dt.day().alias("days_joined"),
    (pl.lit("2025-08-06").str.to_datetime() - pl.col("yelping_since_dt")).alias("membership_duration")
]).collect()

user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,yelping_since_dt,year_joined,month_joined,days_joined,membership_duration
str,str,i64,str,i64,i64,i64,str,str,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,datetime[μs],i32,i8,i8,duration[μs]
"""qVc8ODYU5SZjKXVBgXdI7w""","""Walker""",585,"""2007-01-25 16:47:26""",7217,1259,5994,"""2007""","""NSCy54eWehBJyZdG2iE84w, pe42u7…",267,3.91,250,65,55,56,18,232,844,467,467,239,180,2007-01-25 16:47:26,2007,1,25,6767d 7h 12m 34s
"""j14WgRoU_-2ZE1aw1dXrJg""","""Daniel""",4333,"""2009-01-25 04:35:42""",43091,13066,27281,"""2009,2010,2011,2012,2013,2014,…","""ueRPE0CX75ePGMqOFVj6IQ, 52oH4D…",3138,3.74,1145,264,184,157,251,1847,7054,3131,3131,1521,1946,2009-01-25 04:35:42,2009,1,25,6036d 19h 24m 18s
"""2WnXYQFK0hXEoTxPtV2zvg""","""Steph""",665,"""2008-07-25 10:41:00""",2086,1010,1003,"""2009,2010,2011,2012,2013""","""LuO3Bn4f3rlhyHIaNfTlnA, j9B4Xd…",52,3.32,89,13,10,17,3,66,96,119,119,35,18,2008-07-25 10:41:00,2008,7,25,6220d 13h 19m
"""SZDeASXq7o05mMNLshsdIA""","""Gwen""",224,"""2005-11-29 04:38:33""",512,330,299,"""2009,2010,2011""","""enx1vVPnfdNUdPho6PH_wg, 4wOcvM…",28,4.27,24,4,1,6,2,12,16,26,26,10,9,2005-11-29 04:38:33,2005,11,29,7189d 19h 21m 27s
"""hA5lMy-EnncsH4JoR-hFGQ""","""Karen""",79,"""2007-01-05 19:40:59""",29,15,7,,"""PBK4q9KEEBHhFvSXCUirIw, 3FWPpM…",1,3.54,1,1,0,0,0,1,1,0,0,0,0,2007-01-05 19:40:59,2007,1,5,6787d 4h 19m 1s
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""fB3jbHi3m0L2KgGOxBv6uw""","""Jerrold""",23,"""2015-01-06 00:31:31""",7,0,0,,,0,4.92,0,0,0,0,0,0,0,0,0,0,0,2015-01-06 00:31:31,2015,1,6,3864d 23h 28m 29s
"""68czcr4BxJyMQ9cJBm6C7Q""","""Jane""",1,"""2016-06-14 07:20:52""",0,0,0,,,0,5.0,0,0,0,0,0,0,0,0,0,0,0,2016-06-14 07:20:52,2016,6,14,3339d 16h 39m 8s
"""1x3KMskYxOuJCjRz70xOqQ""","""Shomari""",4,"""2017-02-04 15:31:58""",1,1,0,,,0,2.0,0,0,0,0,0,0,0,0,0,0,0,2017-02-04 15:31:58,2017,2,4,3104d 8h 28m 2s
"""ulfGl4tdbrH05xKzh5lnog""","""Susanne""",2,"""2011-01-14 00:29:08""",0,0,0,,,0,3.0,0,0,0,0,0,0,0,0,0,0,0,2011-01-14 00:29:08,2011,1,14,5317d 23h 30m 52s


## <strong style="color:#5e17eb">  From Epoch Seconds</strong>


In [142]:
user_lf_parsed = user_lf.with_columns(
    pl.col("yelping_since").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("yelping_dt")
)
user_lf_parsed.select(["yelping_since", "yelping_dt"]).collect().head()

yelping_since,yelping_dt
str,datetime[μs]
"""2007-01-25 16:47:26""",2007-01-25 16:47:26
"""2009-01-25 04:35:42""",2009-01-25 04:35:42
"""2008-07-25 10:41:00""",2008-07-25 10:41:00
"""2005-11-29 04:38:33""",2005-11-29 04:38:33
"""2007-01-05 19:40:59""",2007-01-05 19:40:59


In [143]:
user_lf = user_lf.with_columns(
    pl.col("yelping_since").str.to_datetime().alias("yelping_dt")
)["yelping_dt"].collect()


TypeError: LazyFrame is not subscriptable (aside from slicing)

Use `select()` or `filter()` instead.

In [None]:
user_lf_epoch = user_lf.with_columns(
    pl.col("yelping_since")
      .str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S")
      .dt.epoch("d")
      .alias("epoch_days")
)
user_lf_epoch.collect()

user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,yelping_dt,epoch_days
str,str,i64,str,i64,i64,i64,str,str,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,datetime[μs],i32
"""qVc8ODYU5SZjKXVBgXdI7w""","""Walker""",585,"""2007-01-25 16:47:26""",7217,1259,5994,"""2007""","""NSCy54eWehBJyZdG2iE84w, pe42u7…",267,3.91,250,65,55,56,18,232,844,467,467,239,180,2007-01-25 16:47:26,13538
"""j14WgRoU_-2ZE1aw1dXrJg""","""Daniel""",4333,"""2009-01-25 04:35:42""",43091,13066,27281,"""2009,2010,2011,2012,2013,2014,…","""ueRPE0CX75ePGMqOFVj6IQ, 52oH4D…",3138,3.74,1145,264,184,157,251,1847,7054,3131,3131,1521,1946,2009-01-25 04:35:42,14269
"""2WnXYQFK0hXEoTxPtV2zvg""","""Steph""",665,"""2008-07-25 10:41:00""",2086,1010,1003,"""2009,2010,2011,2012,2013""","""LuO3Bn4f3rlhyHIaNfTlnA, j9B4Xd…",52,3.32,89,13,10,17,3,66,96,119,119,35,18,2008-07-25 10:41:00,14085
"""SZDeASXq7o05mMNLshsdIA""","""Gwen""",224,"""2005-11-29 04:38:33""",512,330,299,"""2009,2010,2011""","""enx1vVPnfdNUdPho6PH_wg, 4wOcvM…",28,4.27,24,4,1,6,2,12,16,26,26,10,9,2005-11-29 04:38:33,13116
"""hA5lMy-EnncsH4JoR-hFGQ""","""Karen""",79,"""2007-01-05 19:40:59""",29,15,7,,"""PBK4q9KEEBHhFvSXCUirIw, 3FWPpM…",1,3.54,1,1,0,0,0,1,1,0,0,0,0,2007-01-05 19:40:59,13518
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""fB3jbHi3m0L2KgGOxBv6uw""","""Jerrold""",23,"""2015-01-06 00:31:31""",7,0,0,,,0,4.92,0,0,0,0,0,0,0,0,0,0,0,2015-01-06 00:31:31,16441
"""68czcr4BxJyMQ9cJBm6C7Q""","""Jane""",1,"""2016-06-14 07:20:52""",0,0,0,,,0,5.0,0,0,0,0,0,0,0,0,0,0,0,2016-06-14 07:20:52,16966
"""1x3KMskYxOuJCjRz70xOqQ""","""Shomari""",4,"""2017-02-04 15:31:58""",1,1,0,,,0,2.0,0,0,0,0,0,0,0,0,0,0,0,2017-02-04 15:31:58,17201
"""ulfGl4tdbrH05xKzh5lnog""","""Susanne""",2,"""2011-01-14 00:29:08""",0,0,0,,,0,3.0,0,0,0,0,0,0,0,0,0,0,0,2011-01-14 00:29:08,14988


In [None]:
user_lf.select([
    pl.col("yelping_since").str.to_datetime().dt.year().alias("year"),
    pl.col("yelping_since").str.to_datetime().dt.month().alias("month"),
    pl.col("yelping_since").str.to_datetime().dt.weekday().alias("weekday")
]).collect()


year,month,weekday
i32,i8,i8
2007,1,4
2009,1,7
2008,7,5
2005,11,2
2007,1,5
…,…,…
2015,1,2
2016,6,2
2017,2,6
2011,1,5


In [None]:
user_lf.with_columns(
    pl.col("yelping_since").str.to_datetime().alias("yelping_dt")
).collect()

user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,yelping_dt
str,str,i64,str,i64,i64,i64,str,str,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,datetime[μs]
"""qVc8ODYU5SZjKXVBgXdI7w""","""Walker""",585,"""2007-01-25 16:47:26""",7217,1259,5994,"""2007""","""NSCy54eWehBJyZdG2iE84w, pe42u7…",267,3.91,250,65,55,56,18,232,844,467,467,239,180,2007-01-25 16:47:26
"""j14WgRoU_-2ZE1aw1dXrJg""","""Daniel""",4333,"""2009-01-25 04:35:42""",43091,13066,27281,"""2009,2010,2011,2012,2013,2014,…","""ueRPE0CX75ePGMqOFVj6IQ, 52oH4D…",3138,3.74,1145,264,184,157,251,1847,7054,3131,3131,1521,1946,2009-01-25 04:35:42
"""2WnXYQFK0hXEoTxPtV2zvg""","""Steph""",665,"""2008-07-25 10:41:00""",2086,1010,1003,"""2009,2010,2011,2012,2013""","""LuO3Bn4f3rlhyHIaNfTlnA, j9B4Xd…",52,3.32,89,13,10,17,3,66,96,119,119,35,18,2008-07-25 10:41:00
"""SZDeASXq7o05mMNLshsdIA""","""Gwen""",224,"""2005-11-29 04:38:33""",512,330,299,"""2009,2010,2011""","""enx1vVPnfdNUdPho6PH_wg, 4wOcvM…",28,4.27,24,4,1,6,2,12,16,26,26,10,9,2005-11-29 04:38:33
"""hA5lMy-EnncsH4JoR-hFGQ""","""Karen""",79,"""2007-01-05 19:40:59""",29,15,7,,"""PBK4q9KEEBHhFvSXCUirIw, 3FWPpM…",1,3.54,1,1,0,0,0,1,1,0,0,0,0,2007-01-05 19:40:59
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""fB3jbHi3m0L2KgGOxBv6uw""","""Jerrold""",23,"""2015-01-06 00:31:31""",7,0,0,,,0,4.92,0,0,0,0,0,0,0,0,0,0,0,2015-01-06 00:31:31
"""68czcr4BxJyMQ9cJBm6C7Q""","""Jane""",1,"""2016-06-14 07:20:52""",0,0,0,,,0,5.0,0,0,0,0,0,0,0,0,0,0,0,2016-06-14 07:20:52
"""1x3KMskYxOuJCjRz70xOqQ""","""Shomari""",4,"""2017-02-04 15:31:58""",1,1,0,,,0,2.0,0,0,0,0,0,0,0,0,0,0,0,2017-02-04 15:31:58
"""ulfGl4tdbrH05xKzh5lnog""","""Susanne""",2,"""2011-01-14 00:29:08""",0,0,0,,,0,3.0,0,0,0,0,0,0,0,0,0,0,0,2011-01-14 00:29:08


## <strong style="color:#5e17eb"> Summary </strong>

**Goal: Convert raw strings or numbers into Polars date/time types.**
| Sub-Topic                      | Function / Method                                        | Example                                                |
| ------------------------------ | -------------------------------------------------------- | ------------------------------------------------------ |
| **1.1 Parse from String**      | `.str.strptime(dtype, format=...)`                       | `pl.col("date_str").str.strptime(pl.Date, "%Y-%m-%d")` |
| **1.2 Auto-parse**             | `.str.to_date()`, `.str.to_datetime()`, `.str.to_time()` | `pl.col("date_str").str.to_datetime()`                 |
| **1.3 From Timestamps**        | `.cast(pl.Datetime)`                                     | `pl.col("timestamp").cast(pl.Datetime)`                |
| **1.4 Create from Components** | `pl.date(year, month, day)`                              | `pl.date(2025, 8, 5)`                                  |
| **1.5 From Epoch Seconds**     | `.dt.epoch(time_unit="s")`                               | `pl.Series([1620000000]).cast(pl.Datetime("s"))`       |



# <strong style="color:#5e17eb"> 2. Extracting Date/Time Parts </strong>


## <strong style="color:#5e17eb">Calender Parts  </strong>

In [None]:
user_lf.select([
    pl.col("yelping_dt").dt.year().alias("year"),
    pl.col("yelping_dt").dt.month().alias("month"),
    pl.col("yelping_dt").dt.day().alias("day")
]).collect()

year,month,day
i32,i8,i8
2007,1,25
2009,1,25
2008,7,25
2005,11,29
2007,1,5
…,…,…
2015,1,6
2016,6,14
2017,2,4
2011,1,14


## <strong style="color:#5e17eb">  Time Pats</strong>

In [None]:
calendar_parts = user_lf.with_columns([
    pl.col("yelping_dt").dt.year().alias("year"),
    pl.col("yelping_dt").dt.month().alias("month"),
    pl.col("yelping_dt").dt.day().alias("day")
])

calendar_parts.select(["yelping_since", "year", "month", "day"]).collect()


yelping_since,year,month,day
str,i32,i8,i8
"""2007-01-25 16:47:26""",2007,1,25
"""2009-01-25 04:35:42""",2009,1,25
"""2008-07-25 10:41:00""",2008,7,25
"""2005-11-29 04:38:33""",2005,11,29
"""2007-01-05 19:40:59""",2007,1,5
…,…,…,…
"""2015-01-06 00:31:31""",2015,1,6
"""2016-06-14 07:20:52""",2016,6,14
"""2017-02-04 15:31:58""",2017,2,4
"""2011-01-14 00:29:08""",2011,1,14


## <strong style="color:#5e17eb"> Week and day Info
 </strong>

In [None]:
time_parts = user_lf.with_columns([
    pl.col("yelping_dt").dt.hour().alias("hour"),
    pl.col("yelping_dt").dt.minute().alias("minute"),
    pl.col("yelping_dt").dt.second().alias("second"),
    pl.col("yelping_dt").dt.millisecond().alias("millisecond")
])

time_parts.select(["yelping_since", "hour", "minute", "second", "millisecond"]).collect().head()


yelping_since,hour,minute,second,millisecond
str,i8,i8,i8,i32
"""2007-01-25 16:47:26""",16,47,26,0
"""2009-01-25 04:35:42""",4,35,42,0
"""2008-07-25 10:41:00""",10,41,0,0
"""2005-11-29 04:38:33""",4,38,33,0
"""2007-01-05 19:40:59""",19,40,59,0


In [None]:
week_day_info = user_lf.with_columns([
    pl.col("yelping_dt").dt.week().alias("week"),
    pl.col("yelping_dt").dt.weekday().alias("weekday"),       # Monday=0
    pl.col("yelping_dt").dt.ordinal_day().alias("ordinal_day") # Day of year (1-366)
])

week_day_info.select(["yelping_since", "week", "weekday", "ordinal_day"]).collect().head()


yelping_since,week,weekday,ordinal_day
str,i8,i8,i16
"""2007-01-25 16:47:26""",4,4,25
"""2009-01-25 04:35:42""",4,7,25
"""2008-07-25 10:41:00""",30,5,207
"""2005-11-29 04:38:33""",48,2,333
"""2007-01-05 19:40:59""",1,5,5


## <strong style="color:#5e17eb"> Start/end Info  </strong>

In [None]:
start_end_period = user_lf.with_columns([
    # Start of month/year
    pl.col("yelping_dt").dt.truncate("1mo").alias("start_of_month"),
    pl.col("yelping_dt").dt.truncate("1y").alias("start_of_year"),

    # End of month/year
    (pl.col("yelping_dt").dt.truncate("1mo") + pl.duration(days=31)).dt.truncate("1mo").alias("end_of_month"),
    (pl.col("yelping_dt").dt.truncate("1y") + pl.duration(days=366)).dt.truncate("1y").alias("end_of_year")
])

start_end_period.select([
    "yelping_dt", "start_of_month", "end_of_month", "start_of_year", "end_of_year"
]).collect()

yelping_dt,start_of_month,end_of_month,start_of_year,end_of_year
datetime[μs],datetime[μs],datetime[μs],datetime[μs],datetime[μs]
2007-01-25 16:47:26,2007-01-01 00:00:00,2007-02-01 00:00:00,2007-01-01 00:00:00,2008-01-01 00:00:00
2009-01-25 04:35:42,2009-01-01 00:00:00,2009-02-01 00:00:00,2009-01-01 00:00:00,2010-01-01 00:00:00
2008-07-25 10:41:00,2008-07-01 00:00:00,2008-08-01 00:00:00,2008-01-01 00:00:00,2009-01-01 00:00:00
2005-11-29 04:38:33,2005-11-01 00:00:00,2005-12-01 00:00:00,2005-01-01 00:00:00,2006-01-01 00:00:00
2007-01-05 19:40:59,2007-01-01 00:00:00,2007-02-01 00:00:00,2007-01-01 00:00:00,2008-01-01 00:00:00
…,…,…,…,…
2015-01-06 00:31:31,2015-01-01 00:00:00,2015-02-01 00:00:00,2015-01-01 00:00:00,2016-01-01 00:00:00
2016-06-14 07:20:52,2016-06-01 00:00:00,2016-07-01 00:00:00,2016-01-01 00:00:00,2017-01-01 00:00:00
2017-02-04 15:31:58,2017-02-01 00:00:00,2017-03-01 00:00:00,2017-01-01 00:00:00,2018-01-01 00:00:00
2011-01-14 00:29:08,2011-01-01 00:00:00,2011-02-01 00:00:00,2011-01-01 00:00:00,2012-01-01 00:00:00


## <strong style="color:#5e17eb"> Summary </strong>

**Goal: Break date/time into components for analysis.**

| Sub-Topic                   | Function / Method                                                 | Example                               |
| --------------------------- | ----------------------------------------------------------------- | ------------------------------------- |
| **2.1 Calendar Parts**      | `.dt.year()`, `.dt.month()`, `.dt.day()`                          | `df.select(pl.col("date").dt.year())` |
| **2.2 Time Parts**          | `.dt.hour()`, `.dt.minute()`, `.dt.second()`, `.dt.millisecond()` | `df.select(pl.col("time").dt.hour())` |
| **2.3 Week & Day Info**     | `.dt.week()`, `.dt.weekday()`, `.dt.ordinal_day()`                | `pl.col("date").dt.weekday()`         |
| **2.4 Start/End of Period** | `.dt.start_of(unit)`, `.dt.end_of(unit)`                          | `pl.col("date").dt.start_of("month")` |


# <strong style="color:#5e17eb">  3. Manipulating Dates & Times</strong>


> Offsets support d (days), h (hours), m (minutes), s (seconds), mo (months), y (years).

## <strong style="color:#5e17eb">  Add & subtract </strong>

In [None]:
add_subtract = user_lf.with_columns([
    pl.col("yelping_dt").dt.offset_by("1d").alias("plus_1_day"),
    pl.col("yelping_dt").dt.offset_by("-2h").alias("minus_2_hours")
])

add_subtract.select(["yelping_dt", "plus_1_day", "minus_2_hours"]).collect().head()


yelping_dt,plus_1_day,minus_2_hours
datetime[μs],datetime[μs],datetime[μs]
2007-01-25 16:47:26,2007-01-26 16:47:26,2007-01-25 14:47:26
2009-01-25 04:35:42,2009-01-26 04:35:42,2009-01-25 02:35:42
2008-07-25 10:41:00,2008-07-26 10:41:00,2008-07-25 08:41:00
2005-11-29 04:38:33,2005-11-30 04:38:33,2005-11-29 02:38:33
2007-01-05 19:40:59,2007-01-06 19:40:59,2007-01-05 17:40:59


## <strong style="color:#5e17eb">Rounding  </strong>

In [None]:
rounding = user_lf.with_columns([
    pl.col("yelping_dt").dt.round("1h").alias("round_to_hour"),
    pl.col("yelping_dt").dt.truncate("1d").alias("truncate_to_day"),
    pl.col("yelping_dt").dt.truncate("1w").alias("truncate_to_week")
])
rounding.select(["yelping_dt", "round_to_hour", "truncate_to_day", "truncate_to_week"]).collect().head()

yelping_dt,round_to_hour,truncate_to_day,truncate_to_week
datetime[μs],datetime[μs],datetime[μs],datetime[μs]
2007-01-25 16:47:26,2007-01-25 17:00:00,2007-01-25 00:00:00,2007-01-22 00:00:00
2009-01-25 04:35:42,2009-01-25 05:00:00,2009-01-25 00:00:00,2009-01-19 00:00:00
2008-07-25 10:41:00,2008-07-25 11:00:00,2008-07-25 00:00:00,2008-07-21 00:00:00
2005-11-29 04:38:33,2005-11-29 05:00:00,2005-11-29 00:00:00,2005-11-28 00:00:00
2007-01-05 19:40:59,2007-01-05 20:00:00,2007-01-05 00:00:00,2007-01-01 00:00:00


## <strong style="color:#5e17eb">  Replace Components </strong>

In [None]:
replace_parts = user_lf.with_columns(
    pl.col("yelping_dt").dt.replace(year=2025, month=12).alias("replaced_date")
)

replace_parts.select(["yelping_since", "replaced_date"]).collect().head()


yelping_since,replaced_date
str,datetime[μs]
"""2007-01-25 16:47:26""",2025-12-25 16:47:26
"""2009-01-25 04:35:42""",2025-12-25 04:35:42
"""2008-07-25 10:41:00""",2025-12-25 10:41:00
"""2005-11-29 04:38:33""",2025-12-29 04:38:33
"""2007-01-05 19:40:59""",2025-12-05 19:40:59


## <strong style="color:#5e17eb">Clips Ranges  </strong>

In [None]:
clip_ranges = user_lf.with_columns(
    pl.col("yelping_dt").clip(
        pl.date(2007, 1, 1), 
        pl.date(2010, 1, 1)
    ).alias("clipped_date")
)

clip_ranges.select(["yelping_since", "clipped_date"]).collect().head()


yelping_since,clipped_date
str,datetime[μs]
"""2007-01-25 16:47:26""",2007-01-25 16:47:26
"""2009-01-25 04:35:42""",2009-01-25 04:35:42
"""2008-07-25 10:41:00""",2008-07-25 10:41:00
"""2005-11-29 04:38:33""",2007-01-01 00:00:00
"""2007-01-05 19:40:59""",2007-01-05 19:40:59


## <strong style="color:#5e17eb"> Summary </strong>

**Goal: Shift, round, and adjust date/time values.**

| Sub-Topic                  | Function / Method                                      | Example                                |
| -------------------------- | ------------------------------------------------------ | -------------------------------------- |
| **3.1 Add/Subtract**       | `.dt.offset_by("1d")`, `.dt.offset_by("-2h")`          | `pl.col("date").dt.offset_by("7d")`    |
| **3.2 Rounding**           | `.dt.round("1h")`, `.dt.floor("1d")`, `.dt.ceil("1w")` | `pl.col("date").dt.round("1h")`        |
| **3.3 Replace Components** | `.dt.replace(year=2025, month=12)`                     | `pl.col("date").dt.replace(year=2025)` |
| **3.4 Clip Ranges**        | `.clip(pl.date(2024,1,1), pl.date(2025,1,1))`          | Restrict values to range               |


# <strong style="color:#5e17eb"> 4. Time Differences & Durations </strong>


## <strong style="color:#5e17eb"> total_# </strong>

In [None]:
user_lf.with_columns(
    pl.col("yelping_dt")
      .diff(n=1, null_behavior="ignore")      
      .dt.total_days()                         
      .alias("diff_in_days")
).select(['yelping_dt', 'diff_in_days']).collect()


yelping_dt,diff_in_days
datetime[μs],i64
2007-01-25 16:47:26,
2009-01-25 04:35:42,730
2008-07-25 10:41:00,-183
2005-11-29 04:38:33,-969
2007-01-05 19:40:59,402
…,…
2015-01-06 00:31:31,134
2016-06-14 07:20:52,525
2017-02-04 15:31:58,235
2011-01-14 00:29:08,-2213


## <strong style="color:#5e17eb"> diff </strong>

In [None]:
user_lf.with_columns(
    pl.col("yelping_dt") \
        .diff().dt.total_days().alias("days_between")
).select(["yelping_dt", "days_between"]).collect()

yelping_dt,days_between
datetime[μs],i64
2007-01-25 16:47:26,
2009-01-25 04:35:42,730
2008-07-25 10:41:00,-183
2005-11-29 04:38:33,-969
2007-01-05 19:40:59,402
…,…
2015-01-06 00:31:31,134
2016-06-14 07:20:52,525
2017-02-04 15:31:58,235
2011-01-14 00:29:08,-2213


## <strong style="color:#5e17eb"> Summary </strong>


**Goal: Measure time gaps and durations.**

| Sub-Topic                   | Function / Method                              | Example                           |
| --------------------------- | ---------------------------------------------- | --------------------------------- |
| **4.1 Difference in Units** | `.dt.diff()`                                   | `pl.col("date").diff().dt.days()` |
| **4.2 Duration Conversion** | `.dt.total_days()`, `.dt.total_seconds()`      | `pl.col("end") - pl.col("start")` |
| **4.3 Absolute Time Gaps**  | `(pl.col("end") - pl.col("start")).dt.hours()` | Difference in hours               |


# <strong style="color:#5e17eb"> 5. Grouping & Resampling with Dates </strong>


## <strong style="color:#5e17eb"> Regular Group_by </strong>

In [None]:
user_lf = user_lf.with_columns(
    pl.col("yelping_since").str.strptime(pl.Datetime)
)

In [None]:
user_lf.head(1).collect()

user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,yelping_dt
str,str,i64,datetime[μs],i64,i64,i64,str,str,i64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,datetime[μs]
"""qVc8ODYU5SZjKXVBgXdI7w""","""Walker""",585,2007-01-25 16:47:26,7217,1259,5994,"""2007""","""NSCy54eWehBJyZdG2iE84w, pe42u7…",267,3.91,250,65,55,56,18,232,844,467,467,239,180,2007-01-25 16:47:26


## <strong style="color:#5e17eb"> Time based Grouping </strong>

In [None]:
business_lf.group_by("review_count") \
    .agg([
        (pl.col("stars").mean())
    ]).collect()

review_count,stars
i64,f64
2126,4.5
405,4.1
152,3.914634
551,3.75
280,3.875
…,…
917,4.0
896,4.0
625,4.125
1176,4.0


## <strong style="color:#5e17eb"> Rolling Windows </strong>

In [None]:
user_lf.sort("yelping_since") \
    .group_by_dynamic(
        index_column="yelping_since",  # Must be datetime
        every="1y",
        period="2y"
    ).agg([
        pl.col("review_count").mean(),
        pl.count().alias("review number")
    ]).collect()


yelping_since,review_count,review number
datetime[μs],f64,u32
2004-01-01 00:00:00,234.257059,1027
2005-01-01 00:00:00,180.217138,6360
2006-01-01 00:00:00,141.196503,20763
2007-01-01 00:00:00,111.428064,46437
2008-01-01 00:00:00,78.78783,96008
…,…,…
2018-01-01 00:00:00,6.28624,238223
2019-01-01 00:00:00,4.975601,152099
2020-01-01 00:00:00,3.521398,87929
2021-01-01 00:00:00,2.682691,43267


## <strong style="color:#5e17eb"> Summary </strong>

**Goal: Aggregate by date/time buckets.**

| Sub-Topic                   | Function / Method                                   | Example             |
| --------------------------- | --------------------------------------------------- | ------------------- |
| **5.1 Regular group_by**     | `.group_by("date").agg(...)`                         | Group by exact date |
| **5.2 Time-based Grouping** | `.group_by_dynamic("date", every="1mo")`             | Monthly aggregation |
| **5.3 Rolling Windows**     | `.group_by_dynamic("date", every="7d", period="7d")` | Weekly rolling sums |


# <strong style="color:#5e17eb"> 6. Working with Timezones </strong>


In [None]:
df = pl.DataFrame({
    "date_str": ["2024-01-01", "2024-06-15", "2024-12-31"]
})

# Convert to Date
df = df.with_columns(
    pl.col("date_str").str.strptime(pl.Date, "%Y-%m-%d").alias("date")
)
print(df)


shape: (3, 2)
┌────────────┬────────────┐
│ date_str   ┆ date       │
│ ---        ┆ ---        │
│ str        ┆ date       │
╞════════════╪════════════╡
│ 2024-01-01 ┆ 2024-01-01 │
│ 2024-06-15 ┆ 2024-06-15 │
│ 2024-12-31 ┆ 2024-12-31 │
└────────────┴────────────┘


## <strong style="color:#5e17eb"> Set a Time Zone </strong>

In [None]:
df = pl.DataFrame({
    "date_slash": ["01/02/2024", "15/03/2024"]
})

df = df.with_columns(
    pl.col("date_slash").str.strptime(pl.Date, "%d/%m/%Y").alias("date")
)


In [None]:
df = pl.DataFrame({
    "ts": [
        datetime.datetime(2024, 1, 1, 12, 0),
        datetime.datetime(2024, 3, 15, 18, 0)
    ]
}).with_columns(
    pl.col("ts").dt.replace_time_zone("UTC")
)

print(df)


shape: (2, 1)
┌─────────────────────────┐
│ ts                      │
│ ---                     │
│ datetime[μs, UTC]       │
╞═════════════════════════╡
│ 2024-01-01 12:00:00 UTC │
│ 2024-03-15 18:00:00 UTC │
└─────────────────────────┘


## <strong style="color:#5e17eb"> Replace a Time zone </strong>

In [None]:
df.with_columns(
    pl.col("ts").dt.convert_time_zone("Asia/Kolkata").alias("ts_IST")
)


ts,ts_IST
"datetime[μs, UTC]","datetime[μs, Asia/Kolkata]"
2024-01-01 12:00:00 UTC,2024-01-01 17:30:00 IST
2024-03-15 18:00:00 UTC,2024-03-15 23:30:00 IST


## <strong style="color:#5e17eb">  Date Range</strong>

In [None]:
# Create a date range
dr = pl.date_range(
    start=datetime.datetime(2024, 1, 1),
    end=datetime.datetime(2024, 1, 10),
    interval="3d",
    eager=True
)
dr


literal
date
2024-01-01
2024-01-04
2024-01-07
2024-01-10


In [None]:
df.schema

Schema([('ts', Datetime(time_unit='us', time_zone='UTC'))])

## <strong style="color:#5e17eb"> Cast  </strong>

In [None]:
df = df.with_columns_seq([
    pl.col("ts").cast(pl.Date).alias("only_date")
])

df = df.with_columns_seq([
    pl.col("only_date").cast(pl.Datetime).alias("back_to_datetime")
])

df

ts,only_date,back_to_datetime
"datetime[μs, UTC]",date,datetime[μs]
2024-01-01 12:00:00 UTC,2024-01-01,2024-01-01 00:00:00
2024-03-15 18:00:00 UTC,2024-03-15,2024-03-15 00:00:00


## <strong style="color:#5e17eb"> Summary </strong>

**Goal: Handle local time vs UTC.**

| Sub-Topic                | Function / Method                       | Example          |
| ------------------------ | --------------------------------------- | ---------------- |
| **6.1 Set Timezone**     | `.dt.replace_time_zone("UTC")`          | Convert to UTC   |
| **6.2 Convert Timezone** | `.dt.convert_time_zone("Asia/Kolkata")` | Change time zone |
| **6.3 Remove Timezone**  | `.dt.replace_time_zone(None)`           | Drop tz info     |


# <strong style="color:#5e17eb"> 7. Validation & Cleaning </strong>


In [None]:
user_lf.null_count().collect()

user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos,yelping_dt
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,37,0,0,0,0,0,1896699,878551,0,0,0,0,0,0,0,0,0,0,0,0,0,0


- Rest we already discuss in our previous notebooks

## <strong style="color:#5e17eb"> Summary </strong>

**Goal: Ensure dates/times are valid and clean.**

| Sub-Topic                     | Function / Method                             | Example                                   |
| ----------------------------- | --------------------------------------------- | ----------------------------------------- |
| **7.1 Check Nulls**           | `.is_null()`, `.is_not_null()`                | `df.filter(pl.col("date").is_not_null())` |
| **7.2 Filter by Range**       | `(pl.col("date") >= pl.date(2024,1,1)) & ...` | Keep data in range                        |
| **7.3 Detect Parsing Errors** | `.str.strptime(..., strict=False)`            | Avoid failing on bad dates                |


# <strong style="color:#5e17eb"> 8. Pro Tips for Date/Time in Polars </strong>

- Prefer Native Types: Use `pl.Date` / `pl.Datetime` — they are faster and memory-efficient.

- Always Specify Format: Avoid ambiguous parsing (%Y-%m-%d is faster than guessing).

- Use Lazy API for Big Time Data: Speeds up large range filtering & grouping.

- Use `.start_of()` & `.end_of()` for Grouping: Prevents month/week edge errors.

- Time Arithmetic: .`dt.offset_by()` is safer than adding integers directly.

- Timezones: Convert all to UTC before joins/merges.

- https://docs.pola.rs/api/python/stable/reference/series/temporal.html

<div style="text-align: center;">
  <h4 style="
    display: inline-block;
    color: #5e17eb;
    font-family: 'Segoe UI';
    border-left: 5px solid #5e17eb;
    background-color: #F8F9F9;
    padding: 10px 20px;
    border-radius: 5px;
    text-align: left;
  ">
  <b>
    Thank You 💜
    </b>
  </h4>
</div>