In [9]:
import pandas as pd

# Read in the 'Discharge' dataset
discharge = pd.read_csv('discharge.csv')

# Calculate outpatient duration for each patient
discharge['outpt_dur'] = discharge.filter(like='NumberDays').max(axis=1)
discharge.loc[discharge.filter(like='NumberDays').isnull().all(axis=1) & (discharge['Prescribed1_422345'] == 'Yes'), 'outpt_dur'] = None
discharge.loc[discharge.filter(like='NumberDays').isnull().all(axis=1) & (discharge['Prescribed1_422345'] == 'No'), 'outpt_dur'] = 0

# Read in the 'Inpatient' dataset
inpatient = pd.read_csv('inpatient.csv')

# Convert date column to datetime format
inpatient['day'] = pd.to_datetime(inpatient['day'])

# Group by patient and count the unique dates to calculate inpatient duration
inpt_dur = inpatient.groupby(['nid'])['day'].nunique().reset_index()
inpt_dur = inpt_dur.rename(columns={'day': 'inpt_dur'})

# Merge the outpatient and inpatient duration data on nid
dat2 = discharge.merge(inpt_dur, left_on='nid', right_on='nid', how='left')
dat2['total_dur'] = dat2['outpt_dur'].fillna(0) + dat2['inpt_dur'].fillna(0)

dat2

Unnamed: 0,nid,Prescribed1_422345,AntibioticName1_422345,NumberDays1_422345,AntibioticName2_422345,NumberDays2_422345,AntibioticName3_422345,NumberDays3_422345,AntibioticName4_422345,NumberDays4_422345,AntibioticName5_422345,NumberDays5_422345,outpt_dur,inpt_dur,total_dur
0,24397,No,,,,,,,,,,,0.0,8.0,8.0
1,24398,Yes,Amoxicillin-Clavulanic Acid (Augmentin),7.0,,,,,,,,,7.0,2.0,9.0
2,24399,Yes,"Cefuroxime (Ceftin, Kefurox, Zinacef)",7.0,,,,,,,,,7.0,6.0,13.0
3,24400,Yes,"Doxycycline (Doxycycline hyclate, Doxy, Vibra,...",10.0,Amoxicillin-Clavulanic Acid (Augmentin),4.0,,,,,,,10.0,4.0,14.0
4,24401,Yes,"Levofloxacin (Levaquin, Quixin)",7.0,,,,,,,,,7.0,3.0,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5354,30650,No,,,,,,,,,,,0.0,,0.0
5355,30653,Yes,"Levofloxacin (Levaquin, Quixin)",3.0,,,,,,,,,3.0,6.0,9.0
5356,30654,Yes,"Ciprofloxacin (Cipro, Ciproxin, Ciprobay)",3.0,,,,,,,,,3.0,6.0,9.0
5357,30656,,,,,,,,,,,,,5.0,5.0


In [10]:
dat2 = dat2.merge(inpatient, left_on='nid', right_on='nid', how='inner')

# Extract quarter and year from the discharge date
dat2['quarter'] = pd.PeriodIndex(pd.to_datetime(dat2['discharge_date']), freq='Q')
dat2['year'] = pd.DatetimeIndex(dat2['discharge_date']).year

# Calculate mean total duration by quarter and year
mean_dur = dat2.groupby(['year', 'quarter'])['total_dur'].mean().reset_index()

# Pivot table to show mean total duration by quarter and year
mean_dur_table = pd.pivot_table(mean_dur, values='total_dur', index=['year'], columns=['quarter'], aggfunc='mean')

mean_dur_table

quarter,2015Q4,2016Q1,2016Q2,2016Q3,2016Q4,2017Q1,2017Q2,2017Q4,2018Q1
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015,9.636544,,,,,,,,
2016,,9.824262,9.770786,9.291275,9.316965,,,,
2017,,,,,,9.412158,4.4,18.0,
2018,,,,,,,,,7.24


In [7]:
# Extract quarter and year from the discharge date
dat2['quarter'] = pd.PeriodIndex(pd.to_datetime(dat2['discharge_date']), freq='Q')
dat2['year'] = pd.DatetimeIndex(dat2['discharge_date']).year

# Calculate mean total duration by quarter and year
mean_dur = dat2.groupby(['year', 'quarter'])['total_dur'].mean().reset_index()
mean_dur['quarter'] = mean_dur['quarter'].astype(str)

# Pivot table to show mean total duration by quarter and year
mean_dur_table = pd.pivot_table(mean_dur, values='total_dur', columns=['quarter'], aggfunc='mean')

mean_dur

Unnamed: 0,year,quarter,total_dur
0,2015,2015Q4,9.636544
1,2016,2016Q1,9.824262
2,2016,2016Q2,9.770786
3,2016,2016Q3,9.291275
4,2016,2016Q4,9.316965
5,2017,2017Q1,9.412158
6,2017,2017Q2,4.4
7,2017,2017Q4,18.0
8,2018,2018Q1,7.24


SQl

-- Calculate outpatient antibiotic duration for each patient
WITH output_dur AS (
  SELECT 
  nid, 
    COALESCE(
      GREATEST(
        NumberDays1_422345,
        NumberDays2_422345,
        NumberDays3_422345,
        NumberDays4_422345,
        NumberDays5_422345
      ),
      CASE
        WHEN Prescribed1_422345 = 'Yes' THEN NULL
        WHEN Prescribed1_422345 = 'No' THEN 0
      END
    ) AS output_dur
  FROM discharge
),
-- Calculate inpatient antibiotic duration for each patient
inpt_dur AS (
  SELECT
    nid, discharge_date,
    COUNT(DISTINCT day) AS inpt_dur
  FROM inpatient
  GROUP BY nid, discharge_date
),

-- Merge the new calculated values of ‘outpt_dur’ and ‘inpt_dur’ to the dataset ‘dat1’
dat1 AS (
  SELECT
    output_dur.nid,
    COALESCE(output_dur.output_dur, 0) AS output_dur,
    COALESCE(inpt_dur.inpt_dur, 0) AS inpt_dur,
    COALESCE(output_dur.output_dur, 0) + COALESCE(inpt_dur.inpt_dur, 0) AS total_dur
  FROM output_dur
  LEFT JOIN inpt_dur ON output_dur.nid = inpt_dur.nid
) 
-- Save the result as 'dat2'
SELECT * INTO dat2 FROM dat1;
