# Original Filing Dates
Data from [here](https://data.sec.gov/submissions/CIK0000055785.json)

In [39]:
from pathlib import Path
import json

with open(Path("data") / "CIK0000055785_submissions.json") as f:
    data = json.load(f)

In [40]:
import pandas as pd

filing_dt = pd.to_datetime(pd.Series(data["filings"]["recent"]["acceptanceDateTime"]))
filing_dt

0      2026-02-12 18:06:05+00:00
1      2026-02-09 21:23:13+00:00
2      2026-02-06 22:27:22+00:00
3      2026-02-05 22:58:10+00:00
4      2026-02-05 21:04:11+00:00
                  ...           
996    2016-10-06 20:20:31+00:00
997    2016-10-06 20:20:28+00:00
998    2016-10-06 20:20:24+00:00
999    2016-10-06 20:20:20+00:00
1000   2016-10-06 20:20:17+00:00
Length: 1001, dtype: datetime64[ns, UTC]

In [41]:
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

def get_market_reception_day(utc_series):
    # 1. Setup the NYSE Business Calendar (excludes weekends & holidays)
    us_bus = CustomBusinessDay(calendar=USFederalHolidayCalendar())
    
    # 2. Localize to UTC and Convert to New York (handles EST/EDT automatically)
    ca_time = pd.to_datetime(utc_series).dt.tz_convert('America/New_York')
    
    # 3. Logic: If filed after 4:00 PM, it belongs to the NEXT business day
    # Note: SEC acceptance times are precision timestamps. 
    is_after_hours = ca_time.dt.hour >= 16
    
    # 4. Create the final date. 
    # Use .normalize() to strip the time and keep just the date
    reception_date = ca_time.dt.normalize()
    
    # Shift only the after-hours filings to the next business day
    reception_date.loc[is_after_hours] += us_bus
    
    # 5. Final Check: Ensure even "on-time" filings aren't on weekends/holidays
    # This rolls a Saturday 10:00 AM filing forward to Monday
    is_weekend_or_holiday = ~reception_date.isin(pd.bdate_range(reception_date.min(), reception_date.max(), freq=us_bus))
    reception_date.loc[is_weekend_or_holiday] += us_bus
    
    return reception_date.dt.tz_localize(None) # Strip TZ for cleaner dataframes

filing_dt = get_market_reception_day(filing_dt)
filing_dt.head(3)

  reception_date.loc[is_after_hours] += us_bus
  reception_date.loc[is_after_hours] += us_bus
  reception_date.loc[is_weekend_or_holiday] += us_bus
  reception_date.loc[is_weekend_or_holiday] += us_bus


0   2026-02-12
1   2026-02-10
2   2026-02-09
dtype: datetime64[ns]

In [42]:
import pandas as pd

# Filing date in terms of next market day
dates = list(zip(filing_dt.to_numpy(), data["filings"]["recent"]["reportDate"]))

df = pd.DataFrame(dates)
df.columns = ["filing_date", "report_date"]
df.head(3)

Unnamed: 0,filing_date,report_date
0,2026-02-12,2025-12-31
1,2026-02-10,2026-02-09
2,2026-02-09,2025-03-31


In [43]:
import numpy as np

df["filing_date"] = pd.to_datetime(df["filing_date"])
df["report_date"] = pd.to_datetime(df["report_date"])

df = df.dropna() # can't convert with missing dates

df

Unnamed: 0,filing_date,report_date
0,2026-02-12,2025-12-31
1,2026-02-10,2026-02-09
2,2026-02-09,2025-03-31
3,2026-02-06,2026-02-05
5,2026-02-02,2026-01-31
...,...,...
996,2016-10-07,2016-10-04
997,2016-10-07,2016-10-04
998,2016-10-07,2016-10-04
999,2016-10-07,2016-10-04


In [44]:
df["filing_date"] = df["filing_date"].dt.year + df["filing_date"].apply(lambda x: x.timetuple().tm_yday) / 366 # leap year
df["report_date"] = df["report_date"].dt.year + df["report_date"].apply(lambda x: x.timetuple().tm_yday) / 366 # leap year

df.to_csv(Path("preprocessed") / "KMB_dates.csv", index=False)

## Other features

In [45]:
data["filings"]["recent"].keys()

dict_keys(['accessionNumber', 'filingDate', 'reportDate', 'acceptanceDateTime', 'act', 'form', 'fileNumber', 'filmNumber', 'items', 'core_type', 'size', 'isXBRL', 'isInlineXBRL', 'primaryDocument', 'primaryDocDescription'])

Ignoring data older than 10 years

In [46]:
data["filings"]["files"]

[{'name': 'CIK0000055785-submissions-001.json',
  'filingCount': 1910,
  'filingFrom': '1994-02-18',
  'filingTo': '2016-10-04'}]

In [47]:
data["filings"].keys()

dict_keys(['recent', 'files'])