In [2]:
import pandas as pd

# Load raw data again (keep notebooks independent and reproducible)
df = pd.read_csv("../data/philadelphia_911.csv")

# Parse timestamps safely (same logic as before)
df["timeStamp"] = pd.to_datetime(
    df["timeStamp"],
    format="mixed",
    dayfirst=True
)

df = df.set_index("timeStamp")

# Create daily call volume
daily_calls = df.resample("D").size().asfreq("D", fill_value=0)

daily_calls.head()

timeStamp
2015-12-10    114
2015-12-11    391
2015-12-12    402
2015-12-13    316
2015-12-14    444
Freq: D, dtype: int64

In [3]:
# Convert to DataFrame
ts_df = daily_calls.to_frame(name="call_volume")

ts_df.head()

Unnamed: 0_level_0,call_volume
timeStamp,Unnamed: 1_level_1
2015-12-10,114
2015-12-11,391
2015-12-12,402
2015-12-13,316
2015-12-14,444


In [4]:
# Create lag features
ts_df["lag_1"] = ts_df["call_volume"].shift(1)
ts_df["lag_7"] = ts_df["call_volume"].shift(7)
ts_df["lag_14"] = ts_df["call_volume"].shift(14)

ts_df.head(10)

Unnamed: 0_level_0,call_volume,lag_1,lag_7,lag_14
timeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-12-10,114,,,
2015-12-11,391,114.0,,
2015-12-12,402,391.0,,
2015-12-13,316,402.0,,
2015-12-14,444,316.0,,
2015-12-15,419,444.0,,
2015-12-16,376,419.0,,
2015-12-17,387,376.0,114.0,
2015-12-18,346,387.0,391.0,
2015-12-19,272,346.0,402.0,


In [5]:
# Rolling average features
ts_df["roll_7"] = ts_df["call_volume"].rolling(window=7).mean()
ts_df["roll_14"] = ts_df["call_volume"].rolling(window=14).mean()

ts_df.head(15)

Unnamed: 0_level_0,call_volume,lag_1,lag_7,lag_14,roll_7,roll_14
timeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-12-10,114,,,,,
2015-12-11,391,114.0,,,,
2015-12-12,402,391.0,,,,
2015-12-13,316,402.0,,,,
2015-12-14,444,316.0,,,,
2015-12-15,419,444.0,,,,
2015-12-16,376,419.0,,,351.714286,
2015-12-17,387,376.0,114.0,,390.714286,
2015-12-18,346,387.0,391.0,,384.285714,
2015-12-19,272,346.0,402.0,,365.714286,


In [6]:
# Calendar-based features
ts_df["day_of_week"] = ts_df.index.dayofweek
ts_df["is_weekend"] = ts_df["day_of_week"].isin([5, 6]).astype(int)

ts_df.head(15)

Unnamed: 0_level_0,call_volume,lag_1,lag_7,lag_14,roll_7,roll_14,day_of_week,is_weekend
timeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-10,114,,,,,,3,0
2015-12-11,391,114.0,,,,,4,0
2015-12-12,402,391.0,,,,,5,1
2015-12-13,316,402.0,,,,,6,1
2015-12-14,444,316.0,,,,,0,0
2015-12-15,419,444.0,,,,,1,0
2015-12-16,376,419.0,,,351.714286,,2,0
2015-12-17,387,376.0,114.0,,390.714286,,3,0
2015-12-18,346,387.0,391.0,,384.285714,,4,0
2015-12-19,272,346.0,402.0,,365.714286,,5,1


In [7]:
# Drop rows with missing values created by lag/rolling features
model_df = ts_df.dropna()

model_df.head()

Unnamed: 0_level_0,call_volume,lag_1,lag_7,lag_14,roll_7,roll_14,day_of_week,is_weekend
timeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-24,447,519.0,387.0,114.0,390.0,390.357143,3,0
2015-12-25,321,447.0,346.0,391.0,386.428571,385.357143,4,0
2015-12-26,286,321.0,272.0,402.0,388.428571,377.071429,5,1
2015-12-27,317,286.0,271.0,316.0,395.0,377.142857,6,1
2015-12-28,380,317.0,427.0,444.0,388.285714,372.571429,0,0


In [8]:
model_df.isna().sum()


call_volume    0
lag_1          0
lag_7          0
lag_14         0
roll_7         0
roll_14        0
day_of_week    0
is_weekend     0
dtype: int64

### Output
This notebook produces the final feature-engineered dataset used for forecasting models.