# Pharmaceutical Demand Forecasting
End-to-end workflow for forecasting pharmaceutical demand using Prophet.
## Steps:
1. Preprocess data (simulating AWS Glue)
2. Train Prophet model
3. Evaluate model performance
4. Simulate AWS SageMaker deployment
5. Generate D3.js visualization

In [None]:
import sys
sys.path.append('../src')
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, f1_score
import numpy as np
from data_preprocessing import preprocess_data
from model_training import train_and_forecast
from aws_integration import simulate_aws_glue_job, simulate_sagemaker_deployment
import streamlit.components.v1 as components

# Step 1: Preprocess data
preprocess_data('../data/sample_demand_data.csv', '../data/cleaned_demand_data.csv')

In [None]:
# Step 2: Train and forecast
model, forecast = train_and_forecast('../data/cleaned_demand_data.csv', '../data/forecast_output.csv')

In [None]:
# Step 3: Evaluate model performance
def evaluate_model(data_path, forecast):
    # Load historical data
    df = pd.read_csv(data_path)
    
    # Ensure 'ds' columns are datetime64[ns]
    df['ds'] = pd.to_datetime(df['ds'])
    forecast['ds'] = pd.to_datetime(forecast['ds'])
    
    # Merge forecast with actual data on 'ds' (date column)
    actual = df[['ds', 'y']].copy()
    forecast = forecast[['ds', 'yhat']].copy()
    merged = pd.merge(actual, forecast, on='ds', how='inner')
    
    # Calculate regression metrics
    mae = mean_absolute_error(merged['y'], merged['yhat'])
    mse = mean_squared_error(merged['y'], merged['yhat'])
    rmse = np.sqrt(mse)
    
    # Calculate F1 score for binary classification
    # Use median demand as threshold to convert to binary labels
    threshold = df['y'].median()
    actual_binary = (merged['y'] > threshold).astype(int)
    predicted_binary = (merged['yhat'] > threshold).astype(int)
    f1 = f1_score(actual_binary, predicted_binary)
    
    print(f'Mean Absolute Error (MAE): {mae:.2f}')
    print(f'Mean Squared Error (MSE): {mse:.2f}')
    print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
    print(f'F1 Score (binary classification, threshold={threshold:.2f}): {f1:.2f}')
    
    return mae, mse, rmse, f1

# Evaluate the model
mae, mse, rmse, f1 = evaluate_model('../data/cleaned_demand_data.csv', forecast)

In [None]:
# Step 4: Simulate AWS integration
simulate_aws_glue_job('../data/sample_demand_data.csv', '../data/cleaned_demand_data.csv')
simulate_sagemaker_deployment(model)

In [None]:
# Step 5: Generate D3.js visualization
def create_d3_visualization(data_path, forecast_path):
    # Load data
    df = pd.read_csv(data_path)
    forecast = pd.read_csv(forecast_path)
    
    # Prepare data for D3.js
    df['type'] = 'actual'
    forecast['type'] = 'forecast'
    df = df[['ds', 'y', 'type']].rename(columns={'y': 'value'})
    forecast = forecast[['ds', 'yhat', 'type']].rename(columns={'yhat': 'value'})
    combined = pd.concat([df, forecast]).reset_index(drop=True)
    combined['ds'] = pd.to_datetime(combined['ds']).dt.strftime('%Y-%m-%d')
    combined.to_csv('../data/d3_data.csv', index=False)
    
    # Generate D3.js HTML
    d3_html = '''
<!DOCTYPE html>
<html>
<head>
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <style>
        .line-actual { stroke: steelblue; stroke-width: 2; }
        .line-forecast { stroke: orange; stroke-width: 2; stroke-dasharray: 5,5; }
        .dot-actual { fill: steelblue; }
        .dot-forecast { fill: orange; }
    </style>
</head>
<body>
    <svg width="800" height="400"></svg>
    <script>
        d3.csv('../data/d3_data.csv').then(function(data) {
            data.forEach(d => {
                d.ds = d3.timeParse('%Y-%m-%d')(d.ds);
                d.value = +d.value;
            });

            const margin = {top: 20, right: 20, bottom: 30, left: 50};
            const width = 800 - margin.left - margin.right;
            const height = 400 - margin.top - margin.bottom;

            const svg = d3.select('svg')
                .append('g')
                .attr('transform', `translate(${margin.left},${margin.top})`);

            const x = d3.scaleTime()
                .domain(d3.extent(data, d => d.ds))
                .range([0, width]);

            const y = d3.scaleLinear()
                .domain([0, d3.max(data, d => d.value)])
                .range([height, 0]);

            svg.append('g')
                .attr('transform', `translate(0,${height})`)
                .call(d3.axisBottom(x));

            svg.append('g')
                .call(d3.axisLeft(y));

            const line = d3.line()
                .x(d => x(d.ds))
                .y(d => y(d.value));

            const dataByType = d3.group(data, d => d.type);

            svg.selectAll('.line')
                .data(dataByType)
                .enter()
                .append('path')
                .attr('class', d => `line-${d[0]}`)
                .attr('d', d => line(d[1]));

            svg.selectAll('.dot')
                .data(data)
                .enter()
                .append('circle')
                .attr('class', d => `dot-${d.type}`)
                .attr('cx', d => x(d.ds))
                .attr('cy', d => y(d.value))
                .attr('r', 3);
        });
    </script>
</body>
</html>
    '''
    with open('../visualizations/d3_visualization.html', 'w') as f:
        f.write(d3_html)
    
    # Display in notebook (for Streamlit, this is handled in app.py)
    components.html(d3_html, height=400)

# Generate visualization
create_d3_visualization('../data/cleaned_demand_data.csv', '../data/forecast_output.csv')