# DateTime data processing

In [1]:
import numpy as np
import pandas as pd
from pytz import all_timezones

In [2]:
date_strings = np.array(["03-04-2005 11:35 PM",
                         "23-05-2010 12:01 PM",
                         "04-09-2009 09:09 PM"])
[pd.to_datetime(date, format="%d-%m-%Y %I:%M %p") for date in date_strings]  # param: errors="coerce" - error handling

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 12:01:00'),
 Timestamp('2009-09-04 21:09:00')]

+ **%Y** - full year
+ **%m** - month with zero addition
+ **%d** - day of the month with zero addition
+ **%I** - 12 hours time with zero addition
+ **%p** - AM or PM
+ **%M** - minutes with zero addition
+ **%S** - seconds with zero addition

In [3]:
pd.Timestamp("2017-05-01 06:00:00", tz="Europe/London")  # pd.Timestamp object with timezone initialization

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [4]:
date = pd.Timestamp("2017-05-01 06:00:00")
date

Timestamp('2017-05-01 06:00:00')

In [5]:
date_in_london = date.tz_localize("Europe/London")  # format time with tz_localize .mtd
date_in_london

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [6]:
date_in_london.tz_convert("Africa/Abidjan")  # time convertion according time zone

Timestamp('2017-05-01 05:00:00+0000', tz='Africa/Abidjan')

In [7]:
dates = pd.Series(pd.date_range("2/2/2002", periods=3, freq="M"))  # applying to pd.Series object
dates.dt.tz_localize("Africa/Abidjan")

0   2002-02-28 00:00:00+00:00
1   2002-03-31 00:00:00+00:00
2   2002-04-30 00:00:00+00:00
dtype: datetime64[ns, Africa/Abidjan]

In [8]:
all_timezones[:3]  # get the slice of all timezones list

['Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa']

In [9]:
dataframe = pd.DataFrame()
dataframe["date"] = pd.date_range("1/1/2001", periods=100_000, freq="H")
dataframe[(dataframe["date"] > "2002-1-1 01:00:00") &
          (dataframe["date"] <= "2002-1-1 04:00:00")]  # boolean expression extraction

Unnamed: 0,date
8762,2002-01-01 02:00:00
8763,2002-01-01 03:00:00
8764,2002-01-01 04:00:00


In [10]:
dataframe = dataframe.set_index(dataframe["date"])
dataframe.loc["2002-1-1 01:00:00" : "2002-1-1 04:00:00"]  # loc with index set extraction

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2002-01-01 01:00:00,2002-01-01 01:00:00
2002-01-01 02:00:00,2002-01-01 02:00:00
2002-01-01 03:00:00,2002-01-01 03:00:00
2002-01-01 04:00:00,2002-01-01 04:00:00


In [11]:
dataframe = pd.DataFrame()
dataframe["date"] = pd.date_range("1/1/2001", periods=150, freq="W")

In [12]:
dataframe["year"] = dataframe["date"].dt.year
dataframe["month"] = dataframe["date"].dt.month
dataframe["day"] = dataframe["date"].dt.day
dataframe["hour"] = dataframe["date"].dt.hour
dataframe["minute"] = dataframe["date"].dt.minute
dataframe.head(5)  # split DateTime format by features

Unnamed: 0,date,year,month,day,hour,minute
0,2001-01-07,2001,1,7,0,0
1,2001-01-14,2001,1,14,0,0
2,2001-01-21,2001,1,21,0,0
3,2001-01-28,2001,1,28,0,0
4,2001-02-04,2001,2,4,0,0


In [13]:
dataframe = pd.DataFrame()
dataframe["Check-In"] = [pd.Timestamp("01-01-2017"), pd.Timestamp("01-04-2017")]
dataframe["Check-Out"] = [pd.Timestamp("01-01-2017"), pd.Timestamp("01-06-2017")]
dataframe["Check-Out"] - dataframe["Check-In"]

0   0 days
1   2 days
dtype: timedelta64[ns]

In [14]:
pd.Series(delta.days for delta in dataframe["Check-Out"] - dataframe["Check-In"])  # delta-time

0    0
1    2
dtype: int64