## Parsing Date and Time Data

In [1]:
import pandas as pd
import numpy as np

# DataFrame with date strings
data = {'date_column': ['2023-10-01', '2024-01-15', '2024-05-10']}
df = pd.DataFrame(data)

In [2]:
df.head()

Unnamed: 0,date_column
0,2023-10-01
1,2024-01-15
2,2024-05-10


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   date_column  3 non-null      object
dtypes: object(1)
memory usage: 152.0+ bytes


In [4]:
# Convert the date column to datetime objects
df['date_column'] = pd.to_datetime(df['date_column'])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date_column  3 non-null      datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 152.0 bytes


#### Extracting Date Components

In [8]:
# Extract year, month, day from the datetime column
df['year'] = df['date_column'].dt.year
df['month'] = df['date_column'].dt.month
df['day'] = df['date_column'].dt.day
df['date_month_name'] = df['date_column'].dt.month_name()
df['date_dow'] = df['date_column'].dt.dayofweek

# day of week - name
df['date_dow_name'] = df['date_column'].dt.day_name()
df['date_is_weekend'] = np.where(df['date_dow_name'].isin(['Sunday', 'Saturday']), 1,0)

In [9]:
df.head()

Unnamed: 0,date_column,year,month,day,date_month_name,date_dow,date_dow_name,date_is_weekend
0,2023-10-01,2023,10,1,October,6,Sunday,1
1,2024-01-15,2024,1,15,January,0,Monday,0
2,2024-05-10,2024,5,10,May,4,Friday,0


In [11]:
df['date_week'] = df['date_column'].dt.isocalendar().week

In [12]:
df['quarter'] = df['date_column'].dt.quarter

In [13]:
df['semester'] = np.where(df['quarter'].isin([1,2]), 1, 2)

#### Handling Time Data

In [14]:
data = {'datetime_column': ['2023-10-01 14:30:00', '2024-01-15 09:45:15', '2024-05-10 18:20:45']}
df = pd.DataFrame(data)

In [15]:
df.head()

Unnamed: 0,datetime_column
0,2023-10-01 14:30:00
1,2024-01-15 09:45:15
2,2024-05-10 18:20:45


In [16]:
df['datetime_column'] = pd.to_datetime(df['datetime_column'])

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   datetime_column  3 non-null      datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 152.0 bytes


**Extract time components**

In [18]:
df['hour'] = df['datetime_column'].dt.hour
df['minute'] = df['datetime_column'].dt.minute
df['second'] = df['datetime_column'].dt.second

df.head()

In [25]:
df['time'] = df['datetime_column'].dt.time

In [26]:
df.head()

Unnamed: 0,datetime_column,hour,minute,second,time
0,2023-10-01 14:30:00,14,30,0,14:30:00
1,2024-01-15 09:45:15,9,45,15,09:45:15
2,2024-05-10 18:20:45,18,20,45,18:20:45


#### Date Arithmetic

In [27]:
from pandas.tseries.offsets import DateOffset

In [30]:
# Add 7 days to each date
df['plus_7_days'] = df['datetime_column'] + DateOffset(days=7)

# Subtract 2 hours from each time
df['minus_2_hours'] = df['datetime_column'] - DateOffset(hours=2)

In [31]:
df.head()

Unnamed: 0,datetime_column,hour,minute,second,time,plus_7_days,minus_2_hours
0,2023-10-01 14:30:00,14,30,0,14:30:00,2023-10-08 14:30:00,2023-10-01 12:30:00
1,2024-01-15 09:45:15,9,45,15,09:45:15,2024-01-22 09:45:15,2024-01-15 07:45:15
2,2024-05-10 18:20:45,18,20,45,18:20:45,2024-05-17 18:20:45,2024-05-10 16:20:45


**Calculating Date Differences**

In [32]:
# Difference between two dates
df['date_diff'] = df['datetime_column'] - pd.to_datetime('2023-10-01')

In [33]:
df.head()

Unnamed: 0,datetime_column,hour,minute,second,time,plus_7_days,minus_2_hours,date_diff
0,2023-10-01 14:30:00,14,30,0,14:30:00,2023-10-08 14:30:00,2023-10-01 12:30:00,0 days 14:30:00
1,2024-01-15 09:45:15,9,45,15,09:45:15,2024-01-22 09:45:15,2024-01-15 07:45:15,106 days 09:45:15
2,2024-05-10 18:20:45,18,20,45,18:20:45,2024-05-17 18:20:45,2024-05-10 16:20:45,222 days 18:20:45


**Resampling and Grouping by Time**

In [34]:
# Create a time series DataFrame
time_series_data = pd.date_range(start='2023-01-01', periods=100, freq='D')
df_time_series = pd.DataFrame({'date': time_series_data, 'value': range(100)})

# Resample to weekly data
df_weekly = df_time_series.resample('W', on='date').sum()

In [35]:
df_weekly.head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2023-01-01,0
2023-01-08,28
2023-01-15,77
2023-01-22,126
2023-01-29,175


**Handling Time Zones**

In [38]:
# Localize the datetime column to UTC
df['datetime_column_utc'] = df['datetime_column'].dt.tz_localize('UTC')

# Convert to a different timezone
df['datetime_column_pst'] = df['datetime_column_utc'].dt.tz_convert('US/Pacific')

In [39]:
df.head()

Unnamed: 0,datetime_column,hour,minute,second,time,plus_7_days,minus_2_hours,date_diff,datetime_column_utc,datetime_column_pst
0,2023-10-01 14:30:00,14,30,0,14:30:00,2023-10-08 14:30:00,2023-10-01 12:30:00,0 days 14:30:00,2023-10-01 14:30:00+00:00,2023-10-01 07:30:00-07:00
1,2024-01-15 09:45:15,9,45,15,09:45:15,2024-01-22 09:45:15,2024-01-15 07:45:15,106 days 09:45:15,2024-01-15 09:45:15+00:00,2024-01-15 01:45:15-08:00
2,2024-05-10 18:20:45,18,20,45,18:20:45,2024-05-17 18:20:45,2024-05-10 16:20:45,222 days 18:20:45,2024-05-10 18:20:45+00:00,2024-05-10 11:20:45-07:00
