## Section 1: Full Extraction

In [None]:
import pandas as pd

# Load dataset
df = pd.read_excel("Patient data.xlsx")
print(f"Extracted {len(df)} rows fully.")
df.head()

## Section 2: Incremental Extraction

In [None]:
from datetime import datetime

# Load last extraction timestamp
with open("last_extraction.txt") as f:
    last_extraction = pd.to_datetime(f.read().strip())

# Ensure visit_date is datetime
df["visit_date"] = pd.to_datetime(df["visit_date"], errors='coerce')

# Extract only new records
incremental_df = df[df["visit_date"] > last_extraction]
print(f"Extracted {len(incremental_df)} rows incrementally since last check.")
incremental_df.head()

## Section 3: Save New Timestamp

In [None]:
# Save current time for next incremental extraction
with open("last_extraction.txt", "w") as f:
    f.write(datetime.now().isoformat())

## Section 4: Transform Full Data

In [None]:
# Cleaning
df = df.drop_duplicates()
df = df.dropna(subset=["age"])
df["age"] = pd.to_numeric(df["age"], errors='coerce')

# Enrichment
df["is_senior"] = df["age"] >= 65

# Categorization
def categorize_age(age):
    if pd.isna(age): return "unknown"
    elif age < 13: return "child"
    elif age < 20: return "teen"
    elif age < 65: return "adult"
    else: return "senior"
df["age_group"] = df["age"].apply(categorize_age)

# Save full transformed
df.to_csv("transformed_full.csv", index=False)
df.head()

## Section 5: Transform Incremental Data

In [None]:
# Apply same transformations
incremental_df = incremental_df.drop_duplicates()
incremental_df = incremental_df.dropna(subset=["age"])
incremental_df["age"] = pd.to_numeric(incremental_df["age"], errors='coerce')
incremental_df["is_senior"] = incremental_df["age"] >= 65
incremental_df["age_group"] = incremental_df["age"].apply(categorize_age)

# Save incremental transformed
incremental_df.to_csv("transformed_incremental.csv", index=False)
incremental_df.head()