# 🔍 Parsing and Splitting Fields

This notebook demonstrates how to split and parse structured data from text fields using pandas.

In [1]:
import pandas as pd
import numpy as np

## 🧑‍🤝‍🧑 Example 1: Split Full Names

In [2]:
df_names = pd.DataFrame({
    'full_name': ['Alice Brown', 'John Smith', 'Linda Johnson']
})
# Split into first and last name
df_names[['first_name', 'last_name']] = df_names['full_name'].str.split(' ', 1, expand=True)
df_names

Unnamed: 0,full_name,first_name,last_name
0,Alice Brown,Alice,Brown
1,John Smith,John,Smith
2,Linda Johnson,Linda,Johnson


## 📧 Example 2: Extract Username and Domain from Email

In [3]:
df_email = pd.DataFrame({
    'email': ['alice@gmail.com', 'bob@yahoo.com', 'charlie@uci.edu']
})
# Extract username and domain
df_email['username'] = df_email['email'].str.extract(r'^(.+?)@')
df_email['domain'] = df_email['email'].str.extract(r'@(.+)$')
df_email

Unnamed: 0,email,username,domain
0,alice@gmail.com,alice,gmail.com
1,bob@yahoo.com,bob,yahoo.com
2,charlie@uci.edu,charlie,uci.edu


## 💵 Example 3: Clean and Convert Price Strings

In [4]:
df_price = pd.DataFrame({
    'price': ['$1,200.00', '$999.99', '$2,345.67']
})
# Remove $ and , then convert to float
df_price['price_clean'] = df_price['price'].str.replace('[$,]', '', regex=True).astype(float)
df_price

Unnamed: 0,price,price_clean
0,"$1,200.00",1200.0
1,$999.99,999.99
2,"$2,345.67",2345.67


## 🕒 Example 4: Parse Timestamp and Extract Date Parts

In [5]:
df_time = pd.DataFrame({
    'timestamp': ['2024-01-01 14:30:00', '2024-01-02 09:45:00', '2024-01-03 22:15:00']
})
# Convert to datetime
df_time['datetime'] = pd.to_datetime(df_time['timestamp'])
# Extract date parts
df_time['date'] = df_time['datetime'].dt.date
df_time['hour'] = df_time['datetime'].dt.hour
df_time

Unnamed: 0,timestamp,datetime,date,hour
0,2024-01-01 14:30:00,2024-01-01 14:30:00,2024-01-01,14
1,2024-01-02 09:45:00,2024-01-02 09:45:00,2024-01-02,9
2,2024-01-03 22:15:00,2024-01-03 22:15:00,2024-01-03,22
