# Day 3: Data Wrangling I - Date/Time and Data Transformation

This notebook covers handling date/time data and data transformation techniques.

## 1. Setting up the Environment

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import pytz

## 2. Working with Datetime

In [5]:
# Creating datetime objects
current_date = datetime.now()
print("Current date and time:", current_date)

# Creating datetime from string
date_string = "2023-09-15 14:30:00"
parsed_date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
print("\nParsed date:", parsed_date)

# Formatting dates
formatted_date = current_date.strftime("%B %d, %Y at %I:%M %p")
print("\nFormatted date:", formatted_date)

Current date and time: 2025-09-17 14:18:31.339980

Parsed date: 2023-09-15 14:30:00

Formatted date: September 17, 2025 at 02:18 PM


## 3. Working with Time Zones

In [4]:
# Working with different time zones
utc_now = datetime.now(pytz.UTC)
print("UTC time:", utc_now)

# Convert to different time zones
time_zones = ['US/Pacific', 'US/Eastern', 'Asia/Tokyo', 'Europe/London','Asia/Kolkata']

for tz in time_zones:
    local_tz = pytz.timezone(tz)
    local_time = utc_now.astimezone(local_tz)
    print("time: ", local_time)
    print(f"\nTime in {tz}:", local_time.strftime("%Y-%m-%d %H:%M:%S %Z"))

UTC time: 2025-09-17 08:44:52.756561+00:00
time:  2025-09-17 01:44:52.756561-07:00

Time in US/Pacific: 2025-09-17 01:44:52 PDT
time:  2025-09-17 04:44:52.756561-04:00

Time in US/Eastern: 2025-09-17 04:44:52 EDT
time:  2025-09-17 17:44:52.756561+09:00

Time in Asia/Tokyo: 2025-09-17 17:44:52 JST
time:  2025-09-17 09:44:52.756561+01:00

Time in Europe/London: 2025-09-17 09:44:52 BST
time:  2025-09-17 14:14:52.756561+05:30

Time in Asia/Kolkata: 2025-09-17 14:14:52 IST


## 4. Date Operations in Pandas

In [None]:
# Create sample DataFrame with dates
dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
df = pd.DataFrame({'date': dates})

# Extract components
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['day_of_week'] = df['date'].dt.day_name()
df['quarter'] = df['date'].dt.quarter

print("Sample of date components:")
print(df.head())

## 5. Data Binning

In [5]:
# Create sample age data
np.random.seed(42)
ages = np.random.randint(0, 100, 1000)
df_ages = pd.DataFrame({'age': ages})

# Create age bins
bins = [0, 12, 19, 35, 50, 65, 100]
labels = ['Child', 'Teenager', 'Young Adult', 'Adult', 'Middle Aged', 'Senior']

df_ages['age_group'] = pd.cut(df_ages['age'], bins=bins, labels=labels)

# Display distribution
print("Age group distribution:")
print(df_ages['age_group'].value_counts().sort_index())

Age group distribution:
age_group
Child          119
Teenager        78
Young Adult    165
Adult          132
Middle Aged    165
Senior         325
Name: count, dtype: int64


## 6. Data Transformation

In [None]:
# Create sample data
data = {
    'income': np.random.lognormal(10, 1, 1000),
    'score': np.random.normal(70, 15, 1000)
}
df_transform = pd.DataFrame(data)

# Different transformation techniques
# 1. Log transformation
df_transform['income_log'] = np.log(df_transform['income'])

# 2. Z-score normalization
df_transform['score_zscore'] = (df_transform['score'] - df_transform['score'].mean()) / df_transform['score'].std()

# 3. Min-Max scaling
df_transform['score_minmax'] = (df_transform['score'] - df_transform['score'].min()) / (df_transform['score'].max() - df_transform['score'].min())

# Display summary statistics
print("Summary statistics of transformed data:")
print(df_transform.describe())

## 7. Practical Exercise: Time Series Analysis