<a href="https://colab.research.google.com/github/AishwaryaKannan02/pharmacy-benefits-decision-support/blob/main/Pharmacy_PA_predictor_AI_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
aishwaryakannan02_phrmacy_prior_auth_dataset_path = kagglehub.dataset_download('aishwaryakannan02/phrmacy-prior-auth-dataset')
aishwaryakannan02_pharmacy_prior_auth_dataset1_path = kagglehub.dataset_download('aishwaryakannan02/pharmacy-prior-auth-dataset1')
aishwaryakannan02_prior_auth_model_scikitlearn_default_1_path = kagglehub.model_download('aishwaryakannan02/prior-auth-model/ScikitLearn/default/1')

print('Data source import complete.')


# Pharmacy Prior Authorization Predictor ‚Äì End-to-End Notebook
This notebook includes:
1. Dataset creation
2. Model training
3. Model saving (PKL)
4. Architecture diagram
5. Multi-agent blogging system
6. Final project execution


In [None]:
# 1. Imports and configuration
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib
from IPython.display import Markdown, display

# Ensure working folder exists (Kaggle)
os.makedirs('/kaggle/working', exist_ok=True)


In [None]:
# 2. Load dataset from Kaggle input (update path if needed)
DATASET_PATH = '/kaggle/input/pharmacy-prior-auth-dataset1/pharmacy_prior_auth_dataset.csv'

print("Loading dataset from:", DATASET_PATH)
df = pd.read_csv(DATASET_PATH)
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head()


In [None]:
# 3. Sanity checks
print("Missing values per column:")
print(df.isna().sum())

# Confirm target column 'label' exists (as you reported)
assert 'label' in df.columns, "Expected target column 'label' not found."


In [None]:
# 4. Prepare features & target and train a baseline model
FEATURE_COLS = [c for c in df.columns if c != 'label']
TARGET_COL = 'label'

X = df[FEATURE_COLS]
y = df[TARGET_COL]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Choose model - baseline RandomForest (works well on numeric features)
model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
# 5. Save trained model to /kaggle/working/
MODEL_PATH = '/kaggle/working/prior_auth_model.pkl'
joblib.dump(model, MODEL_PATH)
print("Saved model to:", MODEL_PATH)


In [None]:
# 6. Architecture (ASCII) ‚Äî printed for reference
architecture = r"""
===============================================================
        PHARMACY PRIOR AUTHORIZATION PREDICTION SYSTEM
===============================================================

  Data (Kaggle input)
       ‚Üì
  Preprocessing (numeric features)
       ‚Üì
  ML Model Layer (RandomForest)
       ‚Üì
  Model Persistence (PKL)  --> /kaggle/working/prior_auth_model.pkl
       ‚Üì
  Inference (Notebook / API)
       ‚Üì
  Output: Predictions & Explanation

  Parallel: Multi-Agent Blog Generator
  Planner -> Writer -> Editor -> SocialMediaWriter -> Save Markdown
===============================================================
"""
print(architecture)


In [None]:
# 7. Multi-Agent Blogging system (simple, no external LLMs)
# These are deterministic helper agents to generate a tech blog from context.

class Planner:
    def __init__(self):
        self.name = "RobustBlogPlanner"
    def create_outline(self, topic, context=None, metadata=None):
        return [
            "Project Overview",
            "Problem Statement",
            "Solution Summary",
            "Dataset",
            "Model & Training",
            "Architecture",
            "Agent System",
            "Results",
            "How to Run",
            "Conclusion"
        ]

class Writer:
    def __init__(self):
        self.name = "RobustBlogWriter"
    def write_from_outline(self, outline, context=None, metadata=None):
        parts = [f"# {metadata.get('title','Pharmacy Prior Authorization Predictor')}\n"]
        if metadata and metadata.get('author'):
            parts.append(f"*Author: {metadata['author']}*\n")
        parts.append("Auto-generated technical blog post (synthesized).\n\n")
        # Add each section with a short autogenerated paragraph
        for section in outline:
            parts.append(f"## {section}\n")
            if section == "Dataset":
                parts.append(f"The dataset contains {context.get('n_rows', '?')} rows and features: {', '.join(context.get('features',[]))}.\n\n")
            elif section == "Model & Training":
                parts.append("A RandomForestClassifier was trained as a baseline. See saved model for reproduction.\n\n")
            elif section == "Agent System":
                parts.append("A lightweight multi-agent pipeline (Planner, Writer, Editor, Social Media Writer) automates blog generation.\n\n")
            else:
                parts.append(f"Content auto-generated for section: {section}.\n\n")
        return "\n".join(parts)

class Editor:
    def __init__(self):
        self.name = "BlogEditor"
    def edit(self, text):
        # simple cleanup or append editorial note
        return text + "\n---\n*Editorial note: This post was generated automatically.*\n"

class SocialMediaWriter:
    def __init__(self):
        self.name = "SocialMediaWriter"
    def create_snippets(self, markdown_text):
        lines = [ln for ln in markdown_text.splitlines() if ln.strip()]
        title = lines[0] if lines else "Pharmacy PA Predictor"
        snippet = " ".join(lines[1:6])[:250]
        return {
            "twitter": (title + " - " + snippet)[:280],
            "linkedin": title + "\n\n" + snippet[:600]
        }

# Orchestrator (BloggerAgent)
class BloggerAgent:
    def __init__(self, planner, writer, editor, social):
        self.name = "BloggerAgent"
        self.planner = planner
        self.writer = writer
        self.editor = editor
        self.social = social
    def generate(self, topic, context=None, metadata=None, save_path=None):
        outline = self.planner.create_outline(topic, context=context, metadata=metadata or {})
        draft = self.writer.write_from_outline(outline, context=context or {}, metadata=metadata or {})
        edited = self.editor.edit(draft)
        social_snips = self.social.create_snippets(edited)
        if save_path:
            with open(save_path, "w", encoding="utf-8") as f:
                f.write(edited)
        return {
            "title": topic,
            "outline": outline,
            "draft": draft,
            "edited": edited,
            "social": social_snips,
            "path": save_path
        }

# Instantiate agents
planner = Planner()
writer = Writer()
editor = Editor()
social_writer = SocialMediaWriter()
blogger_agent = BloggerAgent(planner, writer, editor, social_writer)

print("Multi-agent components created:", planner.name, writer.name, editor.name, social_writer.name, blogger_agent.name)


In [None]:
# 8. Agent Execution Status Dashboard
def md(text): display(Markdown(text))

md("### üîµ PHARMACY AI AGENT STATUS DASHBOARD")

# 1) Check creation
md("#### 1Ô∏è‚É£ Agent Creation Check")
agents_found = []
for a_name in ['planner', 'writer', 'editor', 'social_writer', 'blogger_agent']:
    exists = a_name in globals()
    agents_found.append((a_name, exists))
    md(f"- **{a_name}**: {'‚úîÔ∏è' if exists else '‚ùå NOT FOUND'}")

# 2) Registration simulation (we don't have Crew/crewai here; show explicit list)
md("#### 2Ô∏è‚É£ Agent Registration Check (simulated)")
registered = [planner.name, writer.name, editor.name, social_writer.name, blogger_agent.name]
md("**Registered Agents:** " + ", ".join(registered))

# 3) Test single agent response
md("#### 3Ô∏è‚É£ Test Agent Response")
try:
    sample_outline = planner.create_outline("Test Topic")
    md("Planner produced outline (first 3 sections): " + ", ".join(sample_outline[:3]))
    md("Writer sample preview:\n\n" + writer.write_from_outline(sample_outline, context={'n_rows':len(df), 'features':FEATURE_COLS}, metadata={'title':'Test','author':'Auto'} )[:400] + "...")
    md("‚úîÔ∏è Agents responding correctly")
except Exception as e:
    md(f"‚ùå Agent response failure: {e}")

# 4) Execute full workflow and preview blog
md("#### 4Ô∏è‚É£ Full Workflow Execution & Blog Generation")
try:
    context = {'n_rows': df.shape[0], 'features': FEATURE_COLS}
    metadata = {'title': 'Pharmacy Prior Authorization Predictor', 'author': 'AutoGen'}
    blog_save_path = '/kaggle/working/pharmacy_pa_blog.md'
    result = blogger_agent.generate(metadata['title'], context=context, metadata=metadata, save_path=blog_save_path)
    md("‚úîÔ∏è Workflow executed successfully.")
    md("**Blog saved to:** `" + blog_save_path + "`")
    md("**Blog preview:**\n\n" + result['edited'][:800] + "...\n")
    md("**Social snippets:**\n\n- Twitter: `" + result['social']['twitter'] + "`\n\n- LinkedIn: `" + result['social']['linkedin'][:300] + "`")
except Exception as e:
    md("‚ùå Workflow execution failed: " + str(e))

md("### üéâ AGENT STATUS CHECK COMPLETE")


In [None]:
# 9. Inference example: load saved model from /kaggle/working and predict on first test row
MODEL_PATH = '/kaggle/working/prior_auth_model.pkl'
loaded_model = joblib.load(MODEL_PATH)
print("Loaded model from", MODEL_PATH)

sample_X = X_test.iloc[[0]]
pred = loaded_model.predict(sample_X)[0]
proba = loaded_model.predict_proba(sample_X) if hasattr(loaded_model, "predict_proba") else None

print("Sample prediction:", pred)
if proba is not None:
    print("Probabilities:", proba)


In [None]:
# 10. Output files and summary
print("\nFiles saved in /kaggle/working/:")
for fname in os.listdir('/kaggle/working'):
    print("-", fname)

print("\nNotebook execution complete. Model and blog generated successfully.")
