In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv(
    './data/nationwide-encounters-fy21-fy24-dec-aor.csv'
)
df['Fiscal Year'] = df['Fiscal Year'].str.replace(' .*', '', regex=True)

df['date'] = df.apply(
    lambda x: pd.to_datetime(f'{x["Fiscal Year"]}-{x["Month (abbv)"]}'),
    axis=1
)
df = df.set_index('date')
print(df.columns)
encounters_per_month = df.loc[:,'Encounter Count'].groupby(df.index).sum()
encounters_per_month

In [None]:
print(df.head())
encounters_per_month.plot()

In [None]:
def fisc_to_cal(dtm, end_month):
...     yr = dtm.year if dtm.month <= end_month else dtm.year - 1
...     return pd.to_datetime(f'{yr}-{dtm.month}')
  
df['caldate'] = df.index.map(lambda x: fisc_to_cal(x, 9))
df = df.set_index('caldate')
encounters_per_month = df.loc[:,'Encounter Count'].groupby(df.index).sum()
encounters_per_month.plot()

In [None]:
encounter_count_df = (
    df.reset_index().loc[:, ['caldate', 'Encounter Type', 'Encounter Count']]
      .groupby(['caldate', 'Encounter Type']).sum().unstack(level=1, fill_value=0)
)
encounter_count_df.columns = encounter_count_df.columns.droplevel(0)
encounter_count_df

In [None]:
encounter_count_df.plot()

In [None]:
by_ctz_df = df.reset_index()
print(by_ctz_df.columns)
by_ctz_df = (
   by_ctz_df.loc[:, ['caldate', 'Citizenship', 'Encounter Count']]
      .groupby(['caldate', 'Citizenship']).sum().unstack(level=1)
      .resample('1y').sum()
)
by_ctz_df.columns = by_ctz_df.columns.droplevel(0)
print(by_ctz_df.iloc[:, :10].to_string())
print(by_ctz_df.iloc[:, 10:20].to_string())

In [None]:
totals = by_ctz_df.apply(sum, axis=1)
by_ctz_df = by_ctz_df.div(totals, axis=0)
by_ctz_df.T

In [None]:
by_ctz_df.pct_change().T

In [None]:
by_ctz_df.pct_change().T.iloc[:, 3].sort_values(ascending=False)

In [None]:
unaccomp_df = (
    df[df['Demographic'] == 'UC / Single Minors'].reset_index().loc[:, ['caldate', 'Citizenship', 'Encounter Count']]
      .groupby(['caldate', 'Citizenship']).sum().unstack(level=1, fill_value=0)
      .resample('1y').sum()
)
unaccomp_df.columns = unaccomp_df.columns.droplevel(0)
#unaccomp_df = unaccomp_df[unaccomp_df.index.year == 2023].T.iloc[:,0]
#unaccomp_df.sort_values(ascending=False)
unaccomp_df.T

In [None]:
unaccomp_2023 = unaccomp_df.T.iloc[:, 3].sort_values(ascending=False)
unaccomp_2023

In [None]:
unaccomp_2023.sum()

In [None]:
print(df.resample('1Y')['Citizenship'].value_counts().to_string())

In [None]:
df['Demographic'].drop_duplicates()

In [None]:
unacc_df = (
    df.query('Demographic == "UC / Single Minors"').reset_index()
      .groupby(['caldate', 'Citizenship', 'AOR (Abbv)'])['Encounter Count'].sum()
)
unacc_df

In [None]:
unacc_df

In [None]:
unacc_df = unacc_df.reset_index()

In [None]:
unacc_df

In [None]:
unacc_df.info()