# Original Filing Dates

In [32]:
from pathlib import Path
import json

with open(Path("data") / "CIK0000055785_submissions.json") as f:
    data = json.load(f)

In [33]:
dates = list(zip(data["filings"]["recent"]["filingDate"], data["filings"]["recent"]["reportDate"]))

len(dates)

1001

In [34]:
import pandas as pd

df = pd.DataFrame(dates)
df.columns = ["filing_date", "report_date"]
df.head(3)

Unnamed: 0,filing_date,report_date
0,2026-02-12,2025-12-31
1,2026-02-09,2026-02-09
2,2026-02-06,2025-03-31


In [35]:
import numpy as np

df["filing_date"] = pd.to_datetime(df["filing_date"])
df["report_date"] = pd.to_datetime(df["report_date"])

df = df.dropna() # can't convert with missing dates

df["filing_date"] = df["filing_date"].dt.year + df["filing_date"].apply(lambda x: x.timetuple().tm_yday) / 366 # leap year
df["report_date"] = df["report_date"].dt.year + df["report_date"].apply(lambda x: x.timetuple().tm_yday) / 366 # leap year

df.to_csv(Path("preprocessed") / "KMB_dates.csv", index=False)

In [39]:
df

Unnamed: 0,filing_date,report_date
0,2026.117486,2025.997268
1,2026.109290,2026.109290
2,2026.101093,2025.245902
3,2026.098361,2026.098361
5,2026.090164,2026.084699
...,...,...
996,2016.765027,2016.759563
997,2016.765027,2016.759563
998,2016.765027,2016.759563
999,2016.765027,2016.759563


## Other features

In [36]:
data["filings"]["recent"].keys()

dict_keys(['accessionNumber', 'filingDate', 'reportDate', 'acceptanceDateTime', 'act', 'form', 'fileNumber', 'filmNumber', 'items', 'core_type', 'size', 'isXBRL', 'isInlineXBRL', 'primaryDocument', 'primaryDocDescription'])

Ignoring data older than 10 years

In [37]:
data["filings"]["files"]

[{'name': 'CIK0000055785-submissions-001.json',
  'filingCount': 1910,
  'filingFrom': '1994-02-18',
  'filingTo': '2016-10-04'}]

In [38]:
data["filings"].keys()

dict_keys(['recent', 'files'])