## lab-01

In [3]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

class BaseParameters:
    def __init__(self):
        # Conversion rates between stages
        self.conversion_rates = {
            'signup_to_appointment': 0.75,  # 75% of signups book appointments
            'appointment_to_prescription': 0.80,  # 80% of appointments get prescriptions
            'prescription_to_delivery': 0.95,  # 95% delivery acceptance
            'delivery_to_refill': 0.85  # 85% initial refill rate
        }
        
        # Time delays (in days)
        self.stage_delays = {
            'signup_to_appointment': 3,  # Average days between signup and appointment
            'appointment_to_prescription': 1,  # Days from appointment to prescription
            'prescription_to_delivery': 2,  # Days from prescription to delivery
            'delivery_to_refill': 30  # Days between refills
        }
        
        # Seasonality patterns (monthly multipliers)
        self.seasonality = {
            1: 1.5,   # January spike
            2: 1.2,   # February
            3: 1.0,   # March
            4: 0.9,   # April
            5: 0.8,   # May
            6: 0.7,   # June
            7: 0.7,   # July
            8: 0.8,   # August
            9: 1.0,   # September
            10: 0.9,  # October
            11: 0.8,  # November
            12: 0.9   # December
        }
        
        # Daily patterns (weekday multipliers)
        self.daily_patterns = {
            0: 0.7,  # Monday
            1: 1.0,  # Tuesday
            2: 1.0,  # Wednesday
            3: 1.0,  # Thursday
            4: 0.8,  # Friday
            5: 0.4,  # Saturday
            6: 0.3   # Sunday
        }
        
    def get_seasonal_multiplier(self, date):
        """Get combined seasonal and daily multiplier for a given date"""
        month_mult = self.seasonality[date.month]
        day_mult = self.daily_patterns[date.weekday()]
        return month_mult * day_mult

In [4]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

class VolumeForecaster:
    def __init__(self, base_params, base_daily_signups=100):
        self.base_params = base_params
        self.base_daily_signups = base_daily_signups
        
    def generate_daily_signups(self, start_date, periods=730):  # 24 months
        dates = pd.date_range(start=start_date, periods=periods, freq='D')
        signups = []
        
        for date in dates:
            daily_volume = self.base_daily_signups * self.base_params.get_seasonal_multiplier(date)
            # Add random noise (±10%)
            noise = np.random.normal(1, 0.1)
            signups.append(max(0, daily_volume * noise))
            
        return pd.DataFrame({
            'date': dates,
            'signups': signups
        })
    
    def calculate_stage_volumes(self, signup_df):
        """Calculate volumes for each stage in the customer journey"""
        df = signup_df.copy()
        
        # Appointments
        df['appointments'] = df['signups'].shift(
            self.base_params.stage_delays['signup_to_appointment']
        ) * self.base_params.conversion_rates['signup_to_appointment']
        
        # Prescriptions
        df['prescriptions'] = df['appointments'].shift(
            self.base_params.stage_delays['appointment_to_prescription']
        ) * self.base_params.conversion_rates['appointment_to_prescription']
        
        # Initial deliveries
        df['deliveries'] = df['prescriptions'].shift(
            self.base_params.stage_delays['prescription_to_delivery']
        ) * self.base_params.conversion_rates['prescription_to_delivery']
        
        return df.fillna(0)

In [5]:
class ConfidenceIntervals:
    def __init__(self, base_uncertainty=0.1, uncertainty_growth=0.01):
        self.base_uncertainty = base_uncertainty
        self.uncertainty_growth = uncertainty_growth
        
    def calculate_intervals(self, forecast_df):
        """Calculate confidence intervals that widen over time"""
        df = forecast_df.copy()
        
        for column in ['signups', 'appointments', 'prescriptions', 'deliveries']:
            if column in df.columns:
                days = np.arange(len(df))
                uncertainty = self.base_uncertainty + (days * self.uncertainty_growth)
                
                df[f'{column}_lower'] = df[column] * (1 - uncertainty)
                df[f'{column}_upper'] = df[column] * (1 + uncertainty)
                
        return df.clip(lower=0)  # Ensure no negative values

In [6]:
class RetentionCalculator:
    def __init__(self, base_params, monthly_churn_rate=0.15):
        self.base_params = base_params
        self.monthly_churn_rate = monthly_churn_rate
        
    def calculate_refills(self, forecast_df):
        """Calculate refill volumes with linear retention decay"""
        df = forecast_df.copy()
        refill_delay = self.base_params.stage_delays['delivery_to_refill']
        initial_refill_rate = self.base_params.conversion_rates['delivery_to_refill']
        
        # Initialize refills column
        df['refills'] = 0.0
        
        # Calculate refills with decay
        for i in range(len(df)):
            if i >= refill_delay:
                retention_months = (i - refill_delay) / 30  # Approximate months since first delivery
                current_retention = max(0, initial_refill_rate * (1 - retention_months * self.monthly_churn_rate))
                df.loc[i, 'refills'] = df.loc[i - refill_delay, 'deliveries'] * current_retention
                
        return df

In [7]:
class HealthcareForecaster:
    def __init__(self, base_daily_signups=100, monthly_churn_rate=0.15):
        self.base_params = BaseParameters()
        self.volume_forecaster = VolumeForecaster(self.base_params, base_daily_signups)
        self.confidence_calculator = ConfidenceIntervals()
        self.retention_calculator = RetentionCalculator(self.base_params, monthly_churn_rate)
        
    def generate_forecast(self, start_date):
        # Generate base volumes
        forecast = self.volume_forecaster.generate_daily_signups(start_date)
        
        # Calculate stage volumes
        forecast = self.volume_forecaster.calculate_stage_volumes(forecast)
        
        # Add refills
        forecast = self.retention_calculator.calculate_refills(forecast)
        
        # Calculate confidence intervals
        forecast = self.confidence_calculator.calculate_intervals(forecast)
        
        return forecast

# Example usage
if __name__ == "__main__":
    forecaster = HealthcareForecaster()
    forecast = forecaster.generate_forecast(datetime.now())
    print(forecast.head())

TypeError: Invalid comparison between dtype=datetime64[ns] and int

In [None]:
# Test implementation

# Test results show expected behavior with:
#  January spike visible
#  Stage-to-stage conversion patterns
#  Widening confidence intervals
#  Growing refill volume


import matplotlib.pyplot as plt

# Initialize forecaster
forecaster = HealthcareForecaster(base_daily_signups=100)
start_date = datetime(2024, 1, 1)
forecast = forecaster.generate_forecast(start_date)

# Plot results
plt.figure(figsize=(15, 8))
for col in ['signups', 'appointments', 'prescriptions', 'deliveries', 'refills']:
    plt.plot(forecast['date'], forecast[col], label=col)
    plt.fill_between(forecast['date'], 
                     forecast[f'{col}_lower'], 
                     forecast[f'{col}_upper'], 
                     alpha=0.2)

plt.title('24-Month Healthcare Customer Journey Forecast')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.legend()
plt.grid(True)
plt.show()

# Display summary statistics
print("\nSummary Statistics:")
print(forecast.describe())