In [10]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from datetime import datetime, timedelta
import time
import numpy as np
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException, WebDriverException
from io import BytesIO
from pptx import Presentation
from pptx.util import Inches
import tempfile
import logging
import traceback

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Scraping Functions ---
def wait_for_element(driver, locator, timeout=20):
    try:
        return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
    except TimeoutException as e:
        logger.error(f"Timeout waiting for element {locator}: {str(e)}")
        raise

def select_date_month_day(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        logger.info(f"Selecting date {date_str} for {date_input_id}")
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        date_input.click()
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
        month_option.click()
        day = date_to_select.day
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
        day_element.click()
        time.sleep(5)
    except Exception as e:
        logger.error(f"Error in select_date_month_day for {date_input_id}: {str(e)}")
        raise

def select_date(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        logger.info(f"Selecting date {date_str} for {date_input_id}")
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        date_input.click()
        time.sleep(2)
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
        time.sleep(2)
        month_option.click()
        year_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-year'))
        year_option = year_select.find_element(By.XPATH, f"//option[@value='{date_to_select.year}']")
        year_option.click()
        day = date_to_select.day
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
        day_element.click()
        time.sleep(5)
    except Exception as e:
        logger.error(f"Error in select_date for {date_input_id}: {str(e)}")
        raise

def extract_grid_data_clm_summary(driver):
    data = []
    try:
        logger.info("Extracting claim summary data")
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_pjqgridClmSummbyProv"))
        total_pages = int(total_pages_element.text.strip())
        logger.info(f"Total pages: {total_pages}")
    except TimeoutException:
        logger.warning("No pagination element found, assuming single page")
        total_pages = 1
    except Exception as e:
        logger.error(f"Error getting total pages: {str(e)}")
        return data

    for current_page in range(1, total_pages + 1):
        try:
            logger.info(f"Processing page {current_page}")
            time.sleep(3)
            driver.execute_script("window.scrollTo(0, 0);")
            grid = wait_for_element(driver, (By.ID, "jqgridClmSummbyProv"))
            rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
            for row in rows:
                try:
                    provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text
                    visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text
                    claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text
                    total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text if \
                        row.find_elements(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']") else '0'
                    data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc})
                except Exception as e:
                    logger.warning(f"Error extracting row on page {current_page}: {str(e)}")
                    continue
            if current_page < total_pages:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 15).until(EC.staleness_of(rows[0]))
        except Exception as e:
            logger.error(f"Error on page {current_page}: {str(e)}")
            break
    return data

def extract_grid_data_patient_analysis(driver):
    all_data = []
    max_retries = 3
    retry_delay = 5
    
    def extract_row(row, max_row_retries=2):
        for retry in range(max_row_retries):
            try:
                employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text
                employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text
                division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text
                total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text
                total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text
                total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text
                return {
                    'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division,
                    'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own,
                    'Total Claim (Dep)': total_claim_dep
                }
            except Exception as e:
                logger.warning(f"Error extracting patient row (Retry {retry + 1}/{max_row_retries}): {str(e)}")
                if retry + 1 == max_row_retries:
                    return None
                time.sleep(1)

    while True:
        for attempt in range(max_retries):
            try:
                logger.info(f"Extracting patient analysis data (Attempt {attempt + 1}/{max_retries})")
                driver.execute_script("window.scrollTo(0, 0);")
                grid = wait_for_element(driver, (By.ID, "jqgridCorpMcAnalysis"), timeout=20)
                
                if not driver.execute_script("return document.readyState === 'complete';"):
                    raise WebDriverException("Page not fully loaded; session may be unstable")
                
                rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow")
                logger.info(f"Found {len(rows)} rows on current page")
                
                for i, row in enumerate(rows):
                    row_data = extract_row(row)
                    if row_data:
                        all_data.append(row_data)
                    else:
                        logger.warning(f"Skipping row {i + 1} after max retries")
                    time.sleep(0.15)
                
                if not driver.find_elements(By.ID, "jqgridCorpMcAnalysis"):
                    raise WebDriverException("Session appears to be lost; grid not found")
                
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(1)
                next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                parent_div = wait_for_element(driver, (By.ID, "next_pjqgridCorpMcAnalysis"), timeout=10)
                if "disabled" in parent_div.get_attribute("class"):
                    logger.info("No more pages to scrape")
                    return all_data
                
                pre_click_class = parent_div.get_attribute("class")
                logger.info(f"Pre-click button class: {pre_click_class}")
                try:
                    driver.execute_script("arguments[0].click();", parent_div)
                except Exception as e:
                    logger.warning(f"JavaScript click failed: {str(e)}, trying native click")
                    parent_div.click()
                
                WebDriverWait(driver, 20).until(
                    lambda driver: len(driver.find_elements(By.CSS_SELECTOR, "tr.jqgrow")) != len(rows) or
                                  "disabled" in parent_div.get_attribute("class"),
                    "Page did not update after clicking next"
                )
                post_click_class = parent_div.get_attribute("class")
                logger.info(f"Post-click button class: {post_click_class}")
                
                time.sleep(2)
                break
            
            except TimeoutException as e:
                logger.error(f"Attempt {attempt + 1} failed: {str(e)}\nTrace: {traceback.format_exc()}")
                if attempt + 1 < max_retries:
                    logger.warning("Pagination failed; refreshing page and retrying")
                    driver.refresh()
                    time.sleep(5)
                    continue
                logger.error("Max retries reached. Aborting patient analysis extraction.")
                return all_data
            except WebDriverException as e:
                logger.error(f"Attempt {attempt + 1} failed: {str(e)}\nTrace: {traceback.format_exc()}")
                if attempt + 1 < max_retries:
                    time.sleep(retry_delay)
                    continue
                return all_data
            except Exception as e:
                logger.error(f"Unexpected error during patient analysis pagination: {str(e)}\nTrace: {traceback.format_exc()}")
                return all_data

def extract_grid_data_mc(driver):
    data = []
    try:
        logger.info("Extracting MC data")
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_jqgrid"))
        total_pages = int(total_pages_element.text.strip())
        logger.info(f"Total pages: {total_pages}")
    except TimeoutException:
        logger.warning("No pagination element found, assuming single page")
        total_pages = 1
    except Exception as e:
        logger.error(f"Error getting total pages: {str(e)}")
        return data

    for current_page in range(1, total_pages + 1):
        try:
            logger.info(f"Processing page {current_page}")
            time.sleep(3)
            driver.execute_script("window.scrollTo(0, 0);")
            grid = wait_for_element(driver, (By.ID, "jqgrid"))
            rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
            for row in rows:
                try:
                    provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip()
                    total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip()
                    total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip()
                    data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits})
                except Exception as e:
                    logger.warning(f"Error extracting MC row on page {current_page}: {str(e)}")
                    continue
            if current_page < total_pages:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 15).until(EC.staleness_of(rows[0]))
        except Exception as e:
            logger.error(f"Error on page {current_page}: {str(e)}")
            break
    return data

def scrape_data(url, user_id, password):
    edge_options = Options()
    edge_options.add_argument("--disable-blink-features=AutomationControlled")
    edge_options.add_argument("--no-sandbox")
    edge_options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
    driver.set_page_load_timeout(60)
    start_year = 2024
    current_date = datetime.now()
    patient_data_by_year_month, claim_data_by_year, mc_data_by_year = {}, {}, {}

    def restart_driver():
        nonlocal driver
        try:
            driver.quit()
        except:
            pass
        driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
        driver.set_page_load_timeout(60)
        logger.info("Restarting WebDriver session")
        driver.get(url)
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

    try:
        logger.info(f"Connecting to {url}")
        driver.get(url)
        
        logger.info("Clicking login image")
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        
        logger.info("Entering credentials")
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        
        logger.info("Clicking Continue button")
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

        def keep_alive():
            try:
                driver.execute_script("return document.title;")
                logger.info("Session keep-alive check successful")
            except Exception as e:
                logger.warning(f"Session keep-alive failed: {str(e)}")
                restart_driver()

        # Patient Analysis (Month by Month)
        for year in range(start_year, 2025 + 1):
            patient_data_by_year_month[year] = {}
            start_month = 1 if year == 2024 else 1
            end_month = 12 if year == 2024 else current_date.month
            for month in range(start_month, end_month + 1):
                start_date = f"{year}-{month:02d}-01"
                last_day = (datetime(year, month + 1, 1) - timedelta(days=1) if month < 12 else datetime(year, 12, 31)).strftime('%Y-%m-%d')
                end_date = last_day if year < current_date.year or (year == current_date.year and month < current_date.month) else current_date.strftime('%Y-%m-%d')
                
                logger.info(f"Scraping Patient Analysis for {year}-{month:02d} ({start_date} to {end_date})")
                keep_alive()
                productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
                productivity_link.click()
                patient_analysis_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]"))
                patient_analysis_link.click()
                select_date(driver, start_date, "txtStartDate")
                select_date(driver, end_date, "txtEndDate")
                search_button = wait_for_element(driver, (By.ID, "btnSearch"))
                search_button.click()
                time.sleep(5)
                dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                select = Select(dropdown)
                select.select_by_value("100")
                time.sleep(10)
                patient_data = extract_grid_data_patient_analysis(driver)
                patient_df = pd.DataFrame(patient_data)
                numeric_cols_patient = ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)']
                for col in numeric_cols_patient:
                    patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce')
                patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)']
                patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit']
                patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit']
                patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)']
                patient_data_by_year_month[year][f"{year}-{month:02d}"] = patient_df
                logger.info(f"Completed Patient Analysis for {year}-{month:02d}")

        # MC Data (Yearly)
        for year in range(start_year, 2025 + 1):
            if year == 2024:
                start_date, end_date = "2024-01-01", "2024-12-31"
            else:
                start_date, end_date = "2025-01-01", current_date.strftime('%Y-%m-%d')
            logger.info(f"Scraping MC by Provider for {year} ({start_date} to {end_date})")
            keep_alive()
            productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
            productivity_link.click()
            mc_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]"))
            mc_link.click()
            time.sleep(2)
            select_date(driver, start_date, "txtStartDate")
            select_date(driver, end_date, "txtEndDate")
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            search_button.click()
            time.sleep(10)
            dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
            driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
            select = Select(dropdown)
            select.select_by_value("100")
            time.sleep(10)
            mc_data = extract_grid_data_mc(driver)
            mc_df = pd.DataFrame(mc_data)
            numeric_cols_mc = ['Total MC Given', 'No. of Visit']
            for col in numeric_cols_mc:
                mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce')
            mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit']) * 100
            mc_data_by_year[year] = mc_df
            logger.info(f"Completed MC by Provider for {year}")

        # Claim Data (Yearly)
        start_date, end_date = "2024-01-01", "2025-01-01"
        logger.info(f"Scraping Claim Summary for 2024 ({start_date} to {end_date})")
        keep_alive()
        reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
        reg_claims_link.click()
        providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
        providers_link.click()
        time.sleep(5)
        select_date_month_day(driver, start_date, "txtFromDate")
        select_date_month_day(driver, end_date, "txtToDate")
        search_button = wait_for_element(driver, (By.ID, "btnSearch"))
        driver.execute_script("arguments[0].click();", search_button)
        time.sleep(10)
        dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
        driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
        select = Select(dropdown)
        select.select_by_value("100")
        time.sleep(5)
        claim_data_2024 = extract_grid_data_clm_summary(driver)
        claim_df_2024 = pd.DataFrame(claim_data_2024)
        numeric_cols_claim = ['No of Visits', 'Total Claim', 'Total MC (Days)']
        for col in numeric_cols_claim:
            claim_df_2024[col] = pd.to_numeric(claim_df_2024[col], errors='coerce')
        claim_df_2024['Avg Claim per Visit'] = claim_df_2024['Total Claim'] / claim_df_2024['No of Visits']
        claim_data_by_year[2024] = claim_df_2024
        logger.info("Completed Claim Summary for 2024")

        start_date, end_date = "2024-12-31", current_date.strftime('%Y-%m-%d')
        logger.info(f"Scraping Claim Summary for 2025 ({start_date} to {end_date})")
        keep_alive()
        reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
        reg_claims_link.click()
        providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
        providers_link.click()
        time.sleep(5)
        select_date_month_day(driver, start_date, "txtFromDate")
        select_date_month_day(driver, end_date, "txtToDate")
        search_button = wait_for_element(driver, (By.ID, "btnSearch"))
        driver.execute_script("arguments[0].click();", search_button)
        time.sleep(10)
        dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
        driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
        select = Select(dropdown)
        select.select_by_value("100")
        time.sleep(5)
        claim_data_recent = extract_grid_data_clm_summary(driver)
        claim_df_recent = pd.DataFrame(claim_data_recent)
        for col in numeric_cols_claim:
            claim_df_recent[col] = pd.to_numeric(claim_df_recent[col], errors='coerce')
        claim_df_recent['Avg Claim per Visit'] = claim_df_recent['Total Claim'] / claim_df_recent['No of Visits']
        claim_data_by_year[2025] = claim_df_recent
        logger.info("Completed Claim Summary for 2025")

        return patient_data_by_year_month, claim_data_by_year, mc_data_by_year, "Data scraped successfully!"
    except TimeoutException as e:
        error_msg = f"Timeout Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    except WebDriverException as e:
        error_msg = f"WebDriver Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    except Exception as e:
        error_msg = f"General Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    finally:
        try:
            driver.quit()
            logger.info("WebDriver closed")
        except Exception as e:
            logger.error(f"Error closing WebDriver: {str(e)}")

# --- Plotting Functions ---
def generate_dashboard_charts(patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"):
    try:
        logger.info(f"Generating yearly dashboard charts for {year}")
        if not patient_data_by_year_month or not claim_data_by_year or not mc_data_by_year:
            logger.error("No data available to generate charts")
            return None, None, None, None
        
        year_int = int(year)
        # Aggregate patient data for the entire year
        patient_df_year = pd.concat(patient_data_by_year_month.get(year_int, {}).values(), ignore_index=True) if year_int in patient_data_by_year_month else pd.DataFrame()
        claim_df = claim_data_by_year.get(year_int, pd.DataFrame())
        mc_df = mc_data_by_year.get(year_int, pd.DataFrame())
        
        if patient_df_year.empty or claim_df.empty or mc_df.empty:
            logger.warning(f"Data for year {year} is empty: Patient: {patient_df_year.empty}, Claim: {claim_df.empty}, MC: {mc_df.empty}")
            return None, None, None, None
        
        # Ensure no duplicate employee entries by aggregating data
        if not patient_df_year.empty:
            patient_df_year = patient_df_year.groupby(['Employee Name', 'Employee No', 'Division/Department']).agg({
                'Total Visit': 'sum',
                'Total MC (Days)': 'sum',
                'Total Claim (Own)': 'sum',
                'Total Claim (Dep)': 'sum',
                'Total Claim (Combined)': 'sum',
                'Avg Claim per Visit': 'mean',
                'Avg MC per Visit': 'mean',
                'Avg Claim per MC': 'mean'
            }).reset_index()

        sns.set(style="whitegrid", palette="muted")
        plt.rcParams.update({
            'font.family': 'Arial', 'font.size': 12, 'axes.titlesize': 16, 
            'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
            'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
            'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
            'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
        })
        
        provider_charts = []
        employee_charts = []

        # Provider Charts
        fig = plt.figure(figsize=(12, 6))
        top_prov_visits = mc_df.sort_values('No. of Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_visits, x='No. of Visit', y='Provider', hue='Provider', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Total Visits ({year})', pad=15)
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Provider')
        for i, v in enumerate(top_prov_visits['No. of Visit']):
            ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_prov_mc = mc_df.sort_values('Total MC Given', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_mc, x='Total MC Given', y='Provider', hue='Provider', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Total MC Given ({year})', pad=15)
        ax.set_xlabel('Total MC (Days)')
        ax.set_ylabel('Provider')
        for i, v in enumerate(top_prov_mc['Total MC Given']):
            ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        if show_mc_pct:
            fig = plt.figure(figsize=(18, 9))
            top_visits_provs = set(mc_df.sort_values('No. of Visit', ascending=False).head(10)['Provider'])
            top_mc_provs = set(mc_df.sort_values('Total MC Given', ascending=False).head(10)['Provider'])
            top_provs = top_visits_provs.union(top_mc_provs)
            top_prov_mc_pct = mc_df[mc_df['Provider'].isin(top_provs)].sort_values(
                '% MC Given', ascending=(mc_sort_order == "asc")).head(20)
            ax = sns.barplot(data=top_prov_mc_pct, x='Provider', y='% MC Given', hue='Provider', 
                            palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 20 Providers by % MC Given ({year}) - Sorted {"Ascending" if mc_sort_order == "asc" else "Descending"}', pad=15)
            ax.set_ylabel('% MC Given', fontsize=14)
            ax.set_xlabel('Provider', fontsize=14)
            plt.xticks(rotation=45, ha='right', fontsize=11)
            for i, v in enumerate(top_prov_mc_pct['% MC Given']):
                ax.text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            provider_charts.append(fig)
        else:
            provider_charts.append(None)
        if show_mc_pct:
            plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_prov_claim = claim_df.sort_values('Total Claim', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_claim, x='Total Claim', y='Provider Name', hue='Provider Name', 
                        palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Total Claim ({year})', pad=15)
        ax.set_xlabel('Total Claim ($)')
        ax.set_ylabel('Provider')
        for i, v in enumerate(top_prov_claim['Total Claim']):
            ax.text(v + 0.5, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_prov_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', 
                        palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})', pad=15)
        ax.set_ylabel('Avg Claim per Visit ($)')
        ax.set_xlabel('Provider')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_prov_avg_claim['Avg Claim per Visit']):
            ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        # Employee Charts (Fixed for Yearly Aggregation)
        fig = plt.figure(figsize=(12, 6))
        top_emp_visits = patient_df_year.sort_values('Total Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                        palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Visits ({year})', pad=15)
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_visits['Total Visit']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_claim = patient_df_year.sort_values('Total Claim (Combined)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                        palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Claim ({year})', pad=15)
        ax.set_xlabel('Total Claim ($)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
            ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_avg_claim = patient_df_year.sort_values('Avg Claim per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                        palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({year})', pad=15)
        ax.set_ylabel('Avg Claim per Visit ($)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
            ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_mc = patient_df_year.sort_values('Total MC (Days)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                        palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total MC ({year})', pad=15)
        ax.set_xlabel('Total MC (Days)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_mc['Total MC (Days)']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_avg_mc = patient_df_year.sort_values('Avg MC per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                        palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg MC per Visit ({year})', pad=15)
        ax.set_ylabel('Avg MC per Visit (Days)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
            ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(10, 6))
        division_claims = patient_df_year.groupby('Division/Department')['Total Claim (Combined)'].sum()
        plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
                startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
        plt.title(f'Claim Distribution by Division ({year})', pad=15)
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        provider_images = [fig_to_image(fig) if fig is not None else None for fig in provider_charts]
        employee_images = [fig_to_image(fig) for fig in employee_charts]
        
        logger.info(f"Generated {len(provider_charts)} provider charts and {len(employee_charts)} employee charts for year {year}")
        return provider_images, employee_images, provider_charts, employee_charts
    except Exception as e:
        logger.error(f"Error generating charts: {str(e)}\nTrace: {traceback.format_exc()}")
        return None, None, None, None

def generate_monthly_patient_charts(patient_data_by_year_month, year, month):
    try:
        logger.info(f"Generating monthly patient charts for {year}-{month}")
        if not patient_data_by_year_month or int(year) not in patient_data_by_year_month or month not in patient_data_by_year_month[int(year)]:
            logger.error(f"No patient data available for {year}-{month}")
            return None, None
        
        year_int = int(year)
        patient_df_month = patient_data_by_year_month[year_int].get(month, pd.DataFrame())
        if patient_df_month.empty:
            logger.warning(f"Patient data for {year}-{month} is empty")
            return None, None
        
        month_name = datetime.strptime(month.split('-')[1], '%m').strftime('%B')
        sns.set(style="whitegrid", palette="muted")
        plt.rcParams.update({
            'font.family': 'Arial', 'font.size': 12, 'axes.titlesize': 16, 
            'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
            'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
            'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
            'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
        })
        
        employee_charts = []

        fig = plt.figure(figsize=(12, 7))
        top_emp_visits = patient_df_month.sort_values('Total Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                        palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Visits ({month_name} {year})', pad=15)
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_visits['Total Visit']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_claim = patient_df_month.sort_values('Total Claim (Combined)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                        palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Claim ({month_name} {year})', pad=15)
        ax.set_xlabel('Total Claim ($)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
            ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_avg_claim = patient_df_month.sort_values('Avg Claim per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                        palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({month_name} {year})', pad=15)
        ax.set_ylabel('Avg Claim per Visit ($)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
            ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_mc = patient_df_month.sort_values('Total MC (Days)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                        palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total MC ({month_name} {year})', pad=15)
        ax.set_xlabel('Total MC (Days)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_mc['Total MC (Days)']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_avg_mc = patient_df_month.sort_values('Avg MC per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                        palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg MC per Visit ({month_name} {year})', pad=15)
        ax.set_ylabel('Avg MC per Visit (Days)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
            ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(10, 7))
        division_claims = patient_df_month.groupby('Division/Department')['Total Claim (Combined)'].sum()
        plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
                startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
        plt.title(f'Claim Distribution by Division ({month_name} {year})', pad=15)
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        employee_images = [fig_to_image(fig) for fig in employee_charts]
        
        logger.info(f"Generated {len(employee_charts)} monthly patient charts for {month_name} {year}")
        return employee_images, employee_charts
    except Exception as e:
        logger.error(f"Error generating monthly patient charts: {str(e)}\nTrace: {traceback.format_exc()}")
        return None, None

def fig_to_image(fig):
    try:
        if fig is None:
            return None
        fig.canvas.draw()
        img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
        img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
        return img_array
    except Exception as e:
        logger.error(f"Error converting figure to image: {str(e)}")
        return None

def charts_to_pptx(provider_charts, employee_charts, title):
    try:
        logger.info(f"Generating PPTX for {title}")
        prs = Presentation()
        slide_layout = prs.slide_layouts[5]
        prs.slide_width = Inches(13.33)
        prs.slide_height = Inches(7.5)

        provider_titles = [
            f"Top 10 Providers by Total Visits ({title})",
            f"Top 10 Providers by Total MC Given ({title})",
            f"Top 20 Providers by % MC Given ({title})",
            f"Top 10 Providers by Total Claim ({title})",
            f"Top 10 Providers by Avg Claim per Visit ({title})"
        ]
        
        employee_titles = [
            f"Top 10 Employees by Total Visits ({title})",
            f"Top 10 Employees by Total Claim ({title})",
            f"Top 10 Employees by Avg Claim per Visit ({title})",
            f"Top 10 Employees by Total MC ({title})",
            f"Top 10 Employees by Avg MC per Visit ({title})",
            f"Claim Distribution by Division ({title})"
        ]

        if provider_charts:
            slide = prs.slides.add_slide(slide_layout)
            slide.shapes.title.text = f"Provider Insights ({title})"
            num_charts = sum(1 for fig in provider_charts if fig is not None)
            if num_charts == 0:
                logger.warning("No valid provider charts to export")
            else:
                charts_per_row = 3
                rows = (num_charts + charts_per_row - 1) // charts_per_row
                chart_width = Inches(3.8)
                chart_height = Inches(2.5) if rows > 1 else Inches(5)
                top_start = Inches(1.5)
                valid_charts = [fig for fig in provider_charts if fig is not None]
                
                for idx, fig in enumerate(valid_charts):
                    row = idx // charts_per_row
                    col = idx % charts_per_row
                    left = Inches(0.5 + col * 4.5)
                    top = top_start + row * (chart_height + Inches(0.2))
                    img_stream = BytesIO()
                    fig.savefig(img_stream, format='png', bbox_inches='tight', dpi=100)
                    img_stream.seek(0)
                    if img_stream.getvalue():
                        slide.shapes.add_picture(img_stream, left, top, width=chart_width, height=chart_height)
                        logger.info(f"Added chart {idx+1}: '{provider_titles[idx]}' to Provider slide")
                    else:
                        logger.warning(f"Failed to save chart {idx+1}: '{provider_titles[idx]}' - empty image stream")

        if employee_charts:
            slide = prs.slides.add_slide(slide_layout)
            slide.shapes.title.text = f"Employee Insights ({title})"
            num_charts = sum(1 for fig in employee_charts if fig is not None)
            if num_charts == 0:
                logger.warning("No valid employee charts to export")
            else:
                charts_per_row = 3
                rows = (num_charts + charts_per_row - 1) // charts_per_row
                chart_width = Inches(3.8)
                chart_height = Inches(2.5) if rows > 1 else Inches(5)
                top_start = Inches(1.5)
                valid_charts = [fig for fig in employee_charts if fig is not None]
                
                for idx, fig in enumerate(valid_charts):
                    row = idx // charts_per_row
                    col = idx % charts_per_row
                    left = Inches(0.5 + col * 4.5)
                    top = top_start + row * (chart_height + Inches(0.2))
                    img_stream = BytesIO()
                    fig.savefig(img_stream, format='png', bbox_inches='tight', dpi=100)
                    img_stream.seek(0)
                    if img_stream.getvalue():
                        slide.shapes.add_picture(img_stream, left, top, width=chart_width, height=chart_height)
                        logger.info(f"Added chart {idx+1}: '{employee_titles[idx]}' to Employee slide")
                    else:
                        logger.warning(f"Failed to save chart {idx+1}: '{employee_titles[idx]}' - empty image stream")

        with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_file:
            prs.save(tmp_file.name)
            tmp_file_path = tmp_file.name

        logger.info(f"PPTX file saved to temporary path: {tmp_file_path}")
        return tmp_file_path
    except Exception as e:
        logger.error(f"Error generating PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
        return None

# --- Gradio Interface ---
with gr.Blocks(title="Claims Analysis Dashboard", css="""
    body { background-color: #f5f6f5; }
    h1, h2 { color: #333333; font-family: Arial; }
""") as demo:
    gr.Markdown("# Claims Analysis Dashboard (2024 - Present)")
    
    with gr.Row():
        url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here", lines=1)
        user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID", lines=1)
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password", lines=1)
    scrape_btn = gr.Button("Submit", variant="primary")
    
    with gr.Row():
        year_dropdown = gr.Dropdown(
            label="Select Year to View Data",
            choices=["2024"],
            value="2024",
            allow_custom_value=False,
            interactive=False
        )
        month_dropdown = gr.Dropdown(
            label="Select Month (Optional)",
            choices=["Yearly"] + [f"{year}-{month:02d}" for year in range(2024, 2036) for month in range(1, 13)],
            value="Yearly",
            allow_custom_value=False,
            interactive=False
        )
        show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True)
        mc_sort_dropdown = gr.Dropdown(
            label="Sort % MC Given",
            choices=["desc", "asc"],
            value="desc",
            allow_custom_value=False
        )
    
    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    patient_state = gr.State()
    claim_state = gr.State()
    mc_state = gr.State()

    with gr.Tabs():
        with gr.TabItem("Provider Insights"):
            gr.Markdown("## Provider Insights Dashboard (Yearly)")
            with gr.Row():
                prov_chart1 = gr.Image(label="Total Visits by Providers", interactive=False)
                prov_chart2 = gr.Image(label="Total MC by Providers", interactive=False)
            with gr.Row():
                prov_chart3 = gr.Image(label="% MC Given by Providers", interactive=False, visible=True)
            with gr.Row():
                prov_chart4 = gr.Image(label="Total Claim by Providers", interactive=False)
                prov_chart5 = gr.Image(label="Average Claim per Visit by Providers", interactive=False)
            download_btn_prov = gr.Button("Download Provider Charts as PPTX")

        with gr.TabItem("Employee Insights"):
            gr.Markdown("## Employee Insights Dashboard")
            with gr.Row():
                emp_chart1 = gr.Image(label="Total Visits by Employees", interactive=False)
                emp_chart2 = gr.Image(label="Total Claim by Employees", interactive=False)
            with gr.Row():
                emp_chart3 = gr.Image(label="Average Claim per Visit by Employees", interactive=False)
                emp_chart4 = gr.Image(label="Total MC by Employees", interactive=False)
            with gr.Row():
                emp_chart5 = gr.Image(label="Average MC per Visit by Employees", interactive=False)
                emp_chart6 = gr.Image(label="Claim Distribution by Division", interactive=False)
            download_btn_emp = gr.Button("Download Employee Charts as PPTX")

    def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order):
        try:
            logger.info("Starting scrape_and_store")
            patient_data_by_year_month, claim_data_by_year, mc_data_by_year, status = scrape_data(url, user_id, password)
            if patient_data_by_year_month is None or claim_data_by_year is None or mc_data_by_year is None:
                logger.warning(f"Scraping failed: {status}")
                return (
                    status, None, None, None, 
                    gr.update(choices=["2024"], value="2024", interactive=False),
                    gr.update(choices=["Yearly"], value="Yearly", interactive=False),
                    None, None, None, None, None, None, None, None, None, None, None
                )

            available_years = sorted(set(patient_data_by_year_month.keys()) | set(claim_data_by_year.keys()) | set(mc_data_by_year.keys()))
            year_choices = [str(year) for year in available_years]
            default_year = year_choices[0] if year_choices else "2024"
            month_choices = ["Yearly"] + [f"{year}-{month:02d}" for year in available_years for month in range(1, 13)]

            provider_images, employee_images, provider_figs, employee_figs = generate_dashboard_charts(
                patient_data_by_year_month, claim_data_by_year, mc_data_by_year, default_year, show_mc_pct, mc_sort_order)
            logger.info("Scraping and chart generation completed successfully")
            return (
                status, patient_data_by_year_month, claim_data_by_year, mc_data_by_year,
                gr.update(choices=year_choices, value=default_year, interactive=True),
                gr.update(choices=month_choices, value="Yearly", interactive=True),
                provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
                employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
            )
        except Exception as e:
            error_msg = f"Error in scrape_and_store: {str(e)}\nTrace: {traceback.format_exc()}"
            logger.error(error_msg)
            return error_msg, None, None, None, gr.update(), gr.update(), None, None, None, None, None, None, None, None, None, None, None

    def update_dashboard(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Updating dashboard for {year}, {month}")
            if not patient_data_by_year_month or not claim_data_by_year or not mc_data_by_year:
                logger.warning("No data available for dashboard update")
                return [None] * 11
            
            if month == "Yearly":
                provider_images, employee_images, _, _ = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
            else:
                provider_images, _, _, _ = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
                employee_images, _ = generate_monthly_patient_charts(patient_data_by_year_month, year, month)
            
            if employee_images is None or len(employee_images) < 6:
                logger.warning("Employee images incomplete, returning None for missing charts")
                employee_images = employee_images or [None] * 6
                employee_images += [None] * (6 - len(employee_images))
            
            return (
                provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
                employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
            )
        except Exception as e:
            logger.error(f"Error updating dashboard: {str(e)}\nTrace: {traceback.format_exc()}")
            return [None] * 11

    def download_provider_pptx(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Downloading provider PPTX for {year}, {month}")
            provider_images, _, provider_figs, _ = generate_dashboard_charts(
                patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
            pptx_path = charts_to_pptx(provider_figs, [], year if month == "Yearly" else f"{datetime.strptime(month.split('-')[1], '%m').strftime('%B')} {year}")
            label = f"Provider_Charts_{year}.pptx" if month == "Yearly" else f"Provider_Charts_{datetime.strptime(month.split('-')[1], '%m').strftime('%B')}_{year}.pptx"
            if pptx_path:
                return gr.File(value=pptx_path, label=label)
            else:
                logger.warning("Failed to generate provider PPTX")
                return None
        except Exception as e:
            logger.error(f"Error downloading provider PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
            return None

    def download_employee_pptx(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Downloading employee PPTX for {year}, {month}")
            if month == "Yearly":
                _, employee_images, _, employee_figs = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
                pptx_path = charts_to_pptx([], employee_figs, year)
                label = f"Employee_Charts_{year}.pptx"
            else:
                employee_images, employee_figs = generate_monthly_patient_charts(patient_data_by_year_month, year, month)
                month_name = datetime.strptime(month.split('-')[1], '%m').strftime('%B')
                pptx_path = charts_to_pptx([], employee_figs, f"{month_name} {year}")
                label = f"Employee_Charts_{month_name}_{year}.pptx"
            if pptx_path:
                return gr.File(value=pptx_path, label=label)
            else:
                logger.warning("Failed to generate employee PPTX")
                return None
        except Exception as e:
            logger.error(f"Error downloading employee PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
            return None

    scrape_btn.click(
        fn=scrape_and_store,
        inputs=[url_input, user_id_input, password_input, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            status_output, patient_state, claim_state, mc_state, year_dropdown, month_dropdown,
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    year_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    month_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    show_mc_pct_checkbox.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    mc_sort_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    download_btn_prov.click(
        fn=download_provider_pptx,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=gr.File()
    )

    download_btn_emp.click(
        fn=download_employee_pptx,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=gr.File()
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7874


2025-03-06 13:52:15,184 - INFO - HTTP Request: GET http://127.0.0.1:7874/gradio_api/startup-events "HTTP/1.1 200 OK"
2025-03-06 13:52:15,240 - INFO - HTTP Request: HEAD http://127.0.0.1:7874/ "HTTP/1.1 200 OK"
2025-03-06 13:52:15,637 - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2025-03-06 13:52:16,568 - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"


* Running on public URL: https://cd0562639799d0d835.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


2025-03-06 13:52:19,662 - INFO - HTTP Request: HEAD https://cd0562639799d0d835.gradio.live "HTTP/1.1 200 OK"




2025-03-06 13:53:32,462 - INFO - Starting scrape_and_store
2025-03-06 13:53:42,317 - INFO - Get LATEST edgedriver version for Edge 133.0.3065
2025-03-06 13:53:42,546 - INFO - Get LATEST edgedriver version for Edge 133.0.3065
2025-03-06 13:53:42,623 - INFO - Driver [C:\Users\DELL-INTERN-HR\.wdm\drivers\edgedriver\win64\133.0.3065.92\msedgedriver.exe] found in cache
2025-03-06 13:53:46,496 - INFO - Connecting to http://119.8.163.172:8080/ClaimEXMVR/Login/index.jsp
2025-03-06 13:53:48,546 - INFO - Clicking login image
2025-03-06 13:53:51,430 - INFO - Entering credentials
2025-03-06 13:53:52,839 - INFO - Clicking Continue button
2025-03-06 13:53:58,066 - INFO - Scraping Patient Analysis for 2024-01 (2024-01-01 to 2024-01-31)
2025-03-06 13:53:58,086 - INFO - Session keep-alive check successful
2025-03-06 13:53:58,883 - INFO - Selecting date 2024-01-01 for txtStartDate
2025-03-06 13:54:09,693 - INFO - Selecting date 2024-01-31 for txtEndDate
2025-03-06 13:54:34,199 - INFO - Extracting patien

In [1]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from datetime import datetime, timedelta
import time
import numpy as np
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException, WebDriverException
from io import BytesIO
from pptx import Presentation
from pptx.util import Inches
import tempfile
import logging
import traceback

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Scraping Functions ---
def wait_for_element(driver, locator, timeout=20):
    try:
        return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
    except TimeoutException as e:
        logger.error(f"Timeout waiting for element {locator}: {str(e)}")
        raise

def select_date_month_day(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        logger.info(f"Selecting date {date_str} for {date_input_id}")
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        date_input.click()
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
        month_option.click()
        day = date_to_select.day
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
        day_element.click()
        time.sleep(5)
    except Exception as e:
        logger.error(f"Error in select_date_month_day for {date_input_id}: {str(e)}")
        raise

def select_date(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        logger.info(f"Selecting date {date_str} for {date_input_id}")
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        date_input.click()
        time.sleep(2)
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
        time.sleep(2)
        month_option.click()
        year_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-year'))
        year_option = year_select.find_element(By.XPATH, f"//option[@value='{date_to_select.year}']")
        year_option.click()
        day = date_to_select.day
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
        day_element.click()
        time.sleep(5)
    except Exception as e:
        logger.error(f"Error in select_date for {date_input_id}: {str(e)}")
        raise

def extract_grid_data_clm_summary(driver):
    data = []
    try:
        logger.info("Extracting claim summary data")
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_pjqgridClmSummbyProv"))
        total_pages = int(total_pages_element.text.strip())
        logger.info(f"Total pages: {total_pages}")
    except TimeoutException:
        logger.warning("No pagination element found, assuming single page")
        total_pages = 1
    except Exception as e:
        logger.error(f"Error getting total pages: {str(e)}")
        return data

    for current_page in range(1, total_pages + 1):
        try:
            logger.info(f"Processing page {current_page}")
            time.sleep(3)
            driver.execute_script("window.scrollTo(0, 0);")
            grid = wait_for_element(driver, (By.ID, "jqgridClmSummbyProv"))
            rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
            for row in rows:
                try:
                    provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text
                    visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text
                    claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text
                    total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text if \
                        row.find_elements(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']") else '0'
                    data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc})
                except Exception as e:
                    logger.warning(f"Error extracting row on page {current_page}: {str(e)}")
                    continue
            if current_page < total_pages:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 15).until(EC.staleness_of(rows[0]))
        except Exception as e:
            logger.error(f"Error on page {current_page}: {str(e)}")
            break
    logger.info(f"Extracted {len(data)} rows from claim summary")
    return data

def extract_grid_data_patient_analysis(driver):
    all_data = []
    max_retries = 5  # Increased retries for more robustness
    retry_delay = 5
    
    def extract_row(row, max_row_retries=3):  # Increased row-level retries
        for retry in range(max_row_retries):
            try:
                employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text
                employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text
                division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text
                total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text
                total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text
                total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text
                return {
                    'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division,
                    'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own,
                    'Total Claim (Dep)': total_claim_dep
                }
            except Exception as e:
                logger.warning(f"Error extracting patient row (Retry {retry + 1}/{max_row_retries}): {str(e)}")
                if retry + 1 == max_row_retries:
                    return None
                time.sleep(1)

    while True:
        for attempt in range(max_retries):
            try:
                logger.info(f"Extracting patient analysis data (Attempt {attempt + 1}/{max_retries})")
                driver.execute_script("window.scrollTo(0, 0);")
                grid = wait_for_element(driver, (By.ID, "jqgridCorpMcAnalysis"), timeout=30)  # Increased timeout
                
                if not driver.execute_script("return document.readyState === 'complete';"):
                    raise WebDriverException("Page not fully loaded; session may be unstable")
                
                # Wait longer for rows to ensure all are loaded
                rows = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
                logger.info(f"Found {len(rows)} rows on current page")
                
                for i, row in enumerate(rows):
                    row_data = extract_row(row)
                    if row_data:
                        all_data.append(row_data)
                    else:
                        logger.warning(f"Skipping row {i + 1} after max retries")
                    time.sleep(0.2)  # Slightly increased delay between rows
                
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)
                next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=15)
                parent_div = wait_for_element(driver, (By.ID, "next_pjqgridCorpMcAnalysis"), timeout=15)
                
                # Check if next button is disabled
                if "disabled" in parent_div.get_attribute("class"):
                    logger.info("No more pages to scrape")
                    break
                
                # Robust click with verification
                pre_click_class = parent_div.get_attribute("class")
                logger.info(f"Pre-click button class: {pre_click_class}")
                try:
                    driver.execute_script("arguments[0].click();", parent_div)
                except Exception as e:
                    logger.warning(f"JavaScript click failed: {str(e)}, trying native click")
                    parent_div.click()
                
                # Wait for page to update
                WebDriverWait(driver, 30).until(
                    lambda driver: len(driver.find_elements(By.CSS_SELECTOR, "tr.jqgrow")) != len(rows) or
                                  "disabled" in parent_div.get_attribute("class"),
                    "Page did not update after clicking next"
                )
                post_click_class = parent_div.get_attribute("class")
                logger.info(f"Post-click button class: {post_click_class}")
                
                time.sleep(3)  # Increased delay to ensure page load
                break
            
            except TimeoutException as e:
                logger.error(f"Attempt {attempt + 1} failed: {str(e)}\nTrace: {traceback.format_exc()}")
                if attempt + 1 < max_retries:
                    logger.warning("Pagination failed; refreshing page and retrying")
                    driver.refresh()
                    time.sleep(5)
                    continue
                logger.error("Max retries reached. Aborting patient analysis extraction.")
                break
            except WebDriverException as e:
                logger.error(f"Attempt {attempt + 1} failed: {str(e)}\nTrace: {traceback.format_exc()}")
                if attempt + 1 < max_retries:
                    time.sleep(retry_delay)
                    continue
                break
            except Exception as e:
                logger.error(f"Unexpected error during patient analysis pagination: {str(e)}\nTrace: {traceback.format_exc()}")
                break
        
        # Exit loop if no more pages or max retries reached
        if "disabled" in parent_div.get_attribute("class") or attempt + 1 == max_retries:
            break

    logger.info(f"Extracted {len(all_data)} rows from patient analysis")
    return all_data

def extract_grid_data_mc(driver):
    data = []
    try:
        logger.info("Extracting MC data")
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_jqgrid"))
        total_pages = int(total_pages_element.text.strip())
        logger.info(f"Total pages: {total_pages}")
    except TimeoutException:
        logger.warning("No pagination element found, assuming single page")
        total_pages = 1
    except Exception as e:
        logger.error(f"Error getting total pages: {str(e)}")
        return data

    for current_page in range(1, total_pages + 1):
        try:
            logger.info(f"Processing page {current_page}")
            time.sleep(3)
            driver.execute_script("window.scrollTo(0, 0);")
            grid = wait_for_element(driver, (By.ID, "jqgrid"))
            rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
            for row in rows:
                try:
                    provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip()
                    total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip()
                    total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip()
                    data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits})
                except Exception as e:
                    logger.warning(f"Error extracting MC row on page {current_page}: {str(e)}")
                    continue
            if current_page < total_pages:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 15).until(EC.staleness_of(rows[0]))
        except Exception as e:
            logger.error(f"Error on page {current_page}: {str(e)}")
            break
    logger.info(f"Extracted {len(data)} rows from MC data")
    return data

def scrape_data(url, user_id, password):
    edge_options = Options()
    edge_options.add_argument("--disable-blink-features=AutomationControlled")
    edge_options.add_argument("--no-sandbox")
    edge_options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
    driver.set_page_load_timeout(60)
    start_year = 2024
    current_date = datetime.now()
    patient_data_by_year_month, claim_data_by_year, mc_data_by_year = {}, {}, {}

    def restart_driver():
        nonlocal driver
        try:
            driver.quit()
        except:
            pass
        driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
        driver.set_page_load_timeout(60)
        logger.info("Restarting WebDriver session")
        driver.get(url)
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

    try:
        logger.info(f"Connecting to {url}")
        driver.get(url)
        
        logger.info("Clicking login image")
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        
        logger.info("Entering credentials")
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        
        logger.info("Clicking Continue button")
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

        def keep_alive():
            try:
                driver.execute_script("return document.title;")
                logger.info("Session keep-alive check successful")
            except Exception as e:
                logger.warning(f"Session keep-alive failed: {str(e)}")
                restart_driver()

        # Patient Analysis (Month by Month)
        for year in range(start_year, 2025 + 1):
            patient_data_by_year_month[year] = {}
            start_month = 1 if year == 2024 else 1
            end_month = 12 if year == 2024 else current_date.month
            for month in range(start_month, end_month + 1):
                start_date = f"{year}-{month:02d}-01"
                last_day = (datetime(year, month + 1, 1) - timedelta(days=1) if month < 12 else datetime(year, 12, 31)).strftime('%Y-%m-%d')
                end_date = last_day if year < current_date.year or (year == current_date.year and month < current_date.month) else current_date.strftime('%Y-%m-%d')
                
                logger.info(f"Scraping Patient Analysis for {year}-{month:02d} ({start_date} to {end_date})")
                keep_alive()
                productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
                productivity_link.click()
                patient_analysis_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]"))
                patient_analysis_link.click()
                select_date(driver, start_date, "txtStartDate")
                select_date(driver, end_date, "txtEndDate")
                search_button = wait_for_element(driver, (By.ID, "btnSearch"))
                search_button.click()
                time.sleep(5)
                dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                select = Select(dropdown)
                select.select_by_value("100")
                time.sleep(15)  # Increased wait time to ensure full data load
                patient_data = extract_grid_data_patient_analysis(driver)
                patient_df = pd.DataFrame(patient_data)
                numeric_cols_patient = ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)']
                for col in numeric_cols_patient:
                    patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce')
                patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)']
                patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit']
                patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit']
                patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)']
                patient_data_by_year_month[year][f"{year}-{month:02d}"] = patient_df
                logger.info(f"Completed Patient Analysis for {year}-{month:02d} with {len(patient_df)} rows")

        # MC Data (Yearly)
        for year in range(start_year, 2025 + 1):
            if year == 2024:
                start_date, end_date = "2024-01-01", "2024-12-31"
            else:
                start_date, end_date = "2025-01-01", current_date.strftime('%Y-%m-%d')
            logger.info(f"Scraping MC by Provider for {year} ({start_date} to {end_date})")
            keep_alive()
            productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
            productivity_link.click()
            mc_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]"))
            mc_link.click()
            time.sleep(2)
            select_date(driver, start_date, "txtStartDate")
            select_date(driver, end_date, "txtEndDate")
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            search_button.click()
            time.sleep(10)
            dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
            driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
            select = Select(dropdown)
            select.select_by_value("100")
            time.sleep(10)
            mc_data = extract_grid_data_mc(driver)
            mc_df = pd.DataFrame(mc_data)
            numeric_cols_mc = ['Total MC Given', 'No. of Visit']
            for col in numeric_cols_mc:
                mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce')
            mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit']) * 100
            mc_data_by_year[year] = mc_df
            logger.info(f"Completed MC by Provider for {year} with {len(mc_df)} rows")

        # Claim Data (Yearly)
        start_date, end_date = "2024-01-01", "2025-01-01"
        logger.info(f"Scraping Claim Summary for 2024 ({start_date} to {end_date})")
        keep_alive()
        reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
        reg_claims_link.click()
        providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
        providers_link.click()
        time.sleep(5)
        select_date_month_day(driver, start_date, "txtFromDate")
        select_date_month_day(driver, end_date, "txtToDate")
        search_button = wait_for_element(driver, (By.ID, "btnSearch"))
        driver.execute_script("arguments[0].click();", search_button)
        time.sleep(10)
        dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
        driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
        select = Select(dropdown)
        select.select_by_value("100")
        time.sleep(5)
        claim_data_2024 = extract_grid_data_clm_summary(driver)
        claim_df_2024 = pd.DataFrame(claim_data_2024)
        numeric_cols_claim = ['No of Visits', 'Total Claim', 'Total MC (Days)']
        for col in numeric_cols_claim:
            claim_df_2024[col] = pd.to_numeric(claim_df_2024[col], errors='coerce')
        claim_df_2024['Avg Claim per Visit'] = claim_df_2024['Total Claim'] / claim_df_2024['No of Visits']
        claim_data_by_year[2024] = claim_df_2024
        logger.info(f"Completed Claim Summary for 2024 with {len(claim_df_2024)} rows")

        start_date, end_date = "2024-12-31", current_date.strftime('%Y-%m-%d')
        logger.info(f"Scraping Claim Summary for 2025 ({start_date} to {end_date})")
        keep_alive()
        reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
        reg_claims_link.click()
        providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
        providers_link.click()
        time.sleep(5)
        select_date_month_day(driver, start_date, "txtFromDate")
        select_date_month_day(driver, end_date, "txtToDate")
        search_button = wait_for_element(driver, (By.ID, "btnSearch"))
        driver.execute_script("arguments[0].click();", search_button)
        time.sleep(10)
        dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
        driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
        select = Select(dropdown)
        select.select_by_value("100")
        time.sleep(5)
        claim_data_recent = extract_grid_data_clm_summary(driver)
        claim_df_recent = pd.DataFrame(claim_data_recent)
        for col in numeric_cols_claim:
            claim_df_recent[col] = pd.to_numeric(claim_df_recent[col], errors='coerce')
        claim_df_recent['Avg Claim per Visit'] = claim_df_recent['Total Claim'] / claim_df_recent['No of Visits']
        claim_data_by_year[2025] = claim_df_recent
        logger.info(f"Completed Claim Summary for 2025 with {len(claim_df_recent)} rows")

        return patient_data_by_year_month, claim_data_by_year, mc_data_by_year, "Data scraped successfully!"
    except TimeoutException as e:
        error_msg = f"Timeout Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    except WebDriverException as e:
        error_msg = f"WebDriver Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    except Exception as e:
        error_msg = f"General Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    finally:
        try:
            driver.quit()
            logger.info("WebDriver closed")
        except Exception as e:
            logger.error(f"Error closing WebDriver: {str(e)}")

# --- Plotting Functions ---
def generate_dashboard_charts(patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"):
    try:
        logger.info(f"Generating yearly dashboard charts for {year}")
        if not patient_data_by_year_month or not claim_data_by_year or not mc_data_by_year:
            logger.error("No data available to generate charts")
            return None, None, None, None
        
        year_int = int(year)
        patient_df_year = pd.concat(patient_data_by_year_month.get(year_int, {}).values(), ignore_index=True) if year_int in patient_data_by_year_month else pd.DataFrame()
        claim_df = claim_data_by_year.get(year_int, pd.DataFrame())
        mc_df = mc_data_by_year.get(year_int, pd.DataFrame())
        
        if patient_df_year.empty or claim_df.empty or mc_df.empty:
            logger.warning(f"Data for year {year} is empty: Patient: {patient_df_year.empty}, Claim: {claim_df.empty}, MC: {mc_df.empty}")
            return None, None, None, None
        
        if not patient_df_year.empty:
            patient_df_year = patient_df_year.groupby(['Employee Name', 'Employee No', 'Division/Department']).agg({
                'Total Visit': 'sum',
                'Total MC (Days)': 'sum',
                'Total Claim (Own)': 'sum',
                'Total Claim (Dep)': 'sum',
                'Total Claim (Combined)': 'sum',
                'Avg Claim per Visit': 'mean',
                'Avg MC per Visit': 'mean',
                'Avg Claim per MC': 'mean'
            }).reset_index()

        sns.set(style="whitegrid", palette="muted")
        plt.rcParams.update({
            'font.family': 'Arial', 'font.size': 12, 'axes.titlesize': 16, 
            'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
            'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
            'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
            'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
        })
        
        provider_charts = []
        employee_charts = []

        fig = plt.figure(figsize=(12, 6))
        top_prov_visits = mc_df.sort_values('No. of Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_visits, x='No. of Visit', y='Provider', hue='Provider', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Total Visits ({year})', pad=15)
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Provider')
        for i, v in enumerate(top_prov_visits['No. of Visit']):
            ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_prov_mc = mc_df.sort_values('Total MC Given', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_mc, x='Total MC Given', y='Provider', hue='Provider', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Total MC Given ({year})', pad=15)
        ax.set_xlabel('Total MC (Days)')
        ax.set_ylabel('Provider')
        for i, v in enumerate(top_prov_mc['Total MC Given']):
            ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        if show_mc_pct:
            fig = plt.figure(figsize=(18, 9))
            top_visits_provs = set(mc_df.sort_values('No. of Visit', ascending=False).head(10)['Provider'])
            top_mc_provs = set(mc_df.sort_values('Total MC Given', ascending=False).head(10)['Provider'])
            top_provs = top_visits_provs.union(top_mc_provs)
            top_prov_mc_pct = mc_df[mc_df['Provider'].isin(top_provs)].sort_values(
                '% MC Given', ascending=(mc_sort_order == "asc")).head(20)
            ax = sns.barplot(data=top_prov_mc_pct, x='Provider', y='% MC Given', hue='Provider', 
                            palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 20 Providers by % MC Given ({year}) - Sorted {"Ascending" if mc_sort_order == "asc" else "Descending"}', pad=15)
            ax.set_ylabel('% MC Given', fontsize=14)
            ax.set_xlabel('Provider', fontsize=14)
            plt.xticks(rotation=45, ha='right', fontsize=11)
            for i, v in enumerate(top_prov_mc_pct['% MC Given']):
                ax.text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            provider_charts.append(fig)
        else:
            provider_charts.append(None)
        if show_mc_pct:
            plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_prov_claim = claim_df.sort_values('Total Claim', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_claim, x='Total Claim', y='Provider Name', hue='Provider Name', 
                        palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Total Claim ({year})', pad=15)
        ax.set_xlabel('Total Claim ($)')
        ax.set_ylabel('Provider')
        for i, v in enumerate(top_prov_claim['Total Claim']):
            ax.text(v + 0.5, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_prov_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_prov_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', 
                        palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})', pad=15)
        ax.set_ylabel('Avg Claim per Visit ($)')
        ax.set_xlabel('Provider')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_prov_avg_claim['Avg Claim per Visit']):
            ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_visits = patient_df_year.sort_values('Total Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                        palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Visits ({year})', pad=15)
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_visits['Total Visit']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_claim = patient_df_year.sort_values('Total Claim (Combined)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                        palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Claim ({year})', pad=15)
        ax.set_xlabel('Total Claim ($)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
            ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_avg_claim = patient_df_year.sort_values('Avg Claim per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                        palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({year})', pad=15)
        ax.set_ylabel('Avg Claim per Visit ($)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
            ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_mc = patient_df_year.sort_values('Total MC (Days)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                        palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total MC ({year})', pad=15)
        ax.set_xlabel('Total MC (Days)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_mc['Total MC (Days)']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 6))
        top_emp_avg_mc = patient_df_year.sort_values('Avg MC per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                        palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg MC per Visit ({year})', pad=15)
        ax.set_ylabel('Avg MC per Visit (Days)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
            ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(10, 6))
        division_claims = patient_df_year.groupby('Division/Department')['Total Claim (Combined)'].sum()
        plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
                startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
        plt.title(f'Claim Distribution by Division ({year})', pad=15)
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        provider_images = [fig_to_image(fig) if fig is not None else None for fig in provider_charts]
        employee_images = [fig_to_image(fig) for fig in employee_charts]
        
        logger.info(f"Generated {len(provider_charts)} provider charts and {len(employee_charts)} employee charts for year {year}")
        return provider_images, employee_images, provider_charts, employee_charts
    except Exception as e:
        logger.error(f"Error generating charts: {str(e)}\nTrace: {traceback.format_exc()}")
        return None, None, None, None

def generate_monthly_patient_charts(patient_data_by_year_month, year, month):
    try:
        logger.info(f"Generating monthly patient charts for {year}-{month}")
        if not patient_data_by_year_month or int(year) not in patient_data_by_year_month or month not in patient_data_by_year_month[int(year)]:
            logger.error(f"No patient data available for {year}-{month}")
            return None, None
        
        year_int = int(year)
        patient_df_month = patient_data_by_year_month[year_int].get(month, pd.DataFrame())
        if patient_df_month.empty:
            logger.warning(f"Patient data for {year}-{month} is empty")
            return None, None
        
        month_name = datetime.strptime(month.split('-')[1], '%m').strftime('%B')
        sns.set(style="whitegrid", palette="muted")
        plt.rcParams.update({
            'font.family': 'Arial', 'font.size': 12, 'axes.titlesize': 16, 
            'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
            'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
            'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
            'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
        })
        
        employee_charts = []

        fig = plt.figure(figsize=(12, 7))
        top_emp_visits = patient_df_month.sort_values('Total Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                        palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Visits ({month_name} {year})', pad=15)
        ax.set_xlabel('Total Visits')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_visits['Total Visit']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_claim = patient_df_month.sort_values('Total Claim (Combined)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                        palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total Claim ({month_name} {year})', pad=15)
        ax.set_xlabel('Total Claim ($)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
            ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_avg_claim = patient_df_month.sort_values('Avg Claim per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                        palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({month_name} {year})', pad=15)
        ax.set_ylabel('Avg Claim per Visit ($)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
            ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_mc = patient_df_month.sort_values('Total MC (Days)', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                        palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Total MC ({month_name} {year})', pad=15)
        ax.set_xlabel('Total MC (Days)')
        ax.set_ylabel('Employee')
        for i, v in enumerate(top_emp_mc['Total MC (Days)']):
            ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(12, 7))
        top_emp_avg_mc = patient_df_month.sort_values('Avg MC per Visit', ascending=False).head(10)
        ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                        palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 10 Employees by Avg MC per Visit ({month_name} {year})', pad=15)
        ax.set_ylabel('Avg MC per Visit (Days)')
        ax.set_xlabel('Employee')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
            ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        fig = plt.figure(figsize=(10, 7))
        division_claims = patient_df_month.groupby('Division/Department')['Total Claim (Combined)'].sum()
        plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
                startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
        plt.title(f'Claim Distribution by Division ({month_name} {year})', pad=15)
        plt.tight_layout()
        employee_charts.append(fig)
        plt.close(fig)

        employee_images = [fig_to_image(fig) for fig in employee_charts]
        
        logger.info(f"Generated {len(employee_charts)} monthly patient charts for {month_name} {year}")
        return employee_images, employee_charts
    except Exception as e:
        logger.error(f"Error generating monthly patient charts: {str(e)}\nTrace: {traceback.format_exc()}")
        return None, None

def fig_to_image(fig):
    try:
        if fig is None:
            return None
        fig.canvas.draw()
        img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
        img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
        return img_array
    except Exception as e:
        logger.error(f"Error converting figure to image: {str(e)}")
        return None

def charts_to_pptx(provider_charts, employee_charts, title):
    try:
        logger.info(f"Generating PPTX for {title}")
        prs = Presentation()
        slide_layout = prs.slide_layouts[5]
        prs.slide_width = Inches(13.33)
        prs.slide_height = Inches(7.5)

        provider_titles = [
            f"Top 10 Providers by Total Visits ({title})",
            f"Top 10 Providers by Total MC Given ({title})",
            f"Top 20 Providers by % MC Given ({title})",
            f"Top 10 Providers by Total Claim ({title})",
            f"Top 10 Providers by Avg Claim per Visit ({title})"
        ]
        
        employee_titles = [
            f"Top 10 Employees by Total Visits ({title})",
            f"Top 10 Employees by Total Claim ({title})",
            f"Top 10 Employees by Avg Claim per Visit ({title})",
            f"Top 10 Employees by Total MC ({title})",
            f"Top 10 Employees by Avg MC per Visit ({title})",
            f"Claim Distribution by Division ({title})"
        ]

        if provider_charts:
            slide = prs.slides.add_slide(slide_layout)
            slide.shapes.title.text = f"Provider Insights ({title})"
            num_charts = sum(1 for fig in provider_charts if fig is not None)
            if num_charts == 0:
                logger.warning("No valid provider charts to export")
            else:
                charts_per_row = 3
                rows = (num_charts + charts_per_row - 1) // charts_per_row
                chart_width = Inches(3.8)
                chart_height = Inches(2.5) if rows > 1 else Inches(5)
                top_start = Inches(1.5)
                valid_charts = [fig for fig in provider_charts if fig is not None]
                
                for idx, fig in enumerate(valid_charts):
                    row = idx // charts_per_row
                    col = idx % charts_per_row
                    left = Inches(0.5 + col * 4.5)
                    top = top_start + row * (chart_height + Inches(0.2))
                    img_stream = BytesIO()
                    fig.savefig(img_stream, format='png', bbox_inches='tight', dpi=100)
                    img_stream.seek(0)
                    if img_stream.getvalue():
                        slide.shapes.add_picture(img_stream, left, top, width=chart_width, height=chart_height)
                        logger.info(f"Added chart {idx+1}: '{provider_titles[idx]}' to Provider slide")
                    else:
                        logger.warning(f"Failed to save chart {idx+1}: '{provider_titles[idx]}' - empty image stream")

        if employee_charts:
            slide = prs.slides.add_slide(slide_layout)
            slide.shapes.title.text = f"Employee Insights ({title})"
            num_charts = sum(1 for fig in employee_charts if fig is not None)
            if num_charts == 0:
                logger.warning("No valid employee charts to export")
            else:
                charts_per_row = 3
                rows = (num_charts + charts_per_row - 1) // charts_per_row
                chart_width = Inches(3.8)
                chart_height = Inches(2.5) if rows > 1 else Inches(5)
                top_start = Inches(1.5)
                valid_charts = [fig for fig in employee_charts if fig is not None]
                
                for idx, fig in enumerate(valid_charts):
                    row = idx // charts_per_row
                    col = idx % charts_per_row
                    left = Inches(0.5 + col * 4.5)
                    top = top_start + row * (chart_height + Inches(0.2))
                    img_stream = BytesIO()
                    fig.savefig(img_stream, format='png', bbox_inches='tight', dpi=100)
                    img_stream.seek(0)
                    if img_stream.getvalue():
                        slide.shapes.add_picture(img_stream, left, top, width=chart_width, height=chart_height)
                        logger.info(f"Added chart {idx+1}: '{employee_titles[idx]}' to Employee slide")
                    else:
                        logger.warning(f"Failed to save chart {idx+1}: '{employee_titles[idx]}' - empty image stream")

        with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_file:
            prs.save(tmp_file.name)
            tmp_file_path = tmp_file.name

        logger.info(f"PPTX file saved to temporary path: {tmp_file_path}")
        return tmp_file_path
    except Exception as e:
        logger.error(f"Error generating PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
        return None

# --- Gradio Interface ---
with gr.Blocks(title="Claims Analysis Dashboard", css="""
    body { background-color: #f5f6f5; }
    h1, h2 { color: #333333; font-family: Arial; }
""") as demo:
    gr.Markdown("# Claims Analysis Dashboard (2024 - Present)")
    
    with gr.Row():
        url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here", lines=1)
        user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID", lines=1)
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password", lines=1)
    scrape_btn = gr.Button("Submit", variant="primary")
    
    with gr.Row():
        year_dropdown = gr.Dropdown(
            label="Select Year to View Data",
            choices=["2024"],
            value="2024",
            allow_custom_value=False,
            interactive=False
        )
        month_dropdown = gr.Dropdown(
            label="Select Month (Optional)",
            choices=["Yearly"] + [f"{year}-{month:02d}" for year in range(2024, 2036) for month in range(1, 13)],
            value="Yearly",
            allow_custom_value=False,
            interactive=False
        )
        show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True)
        mc_sort_dropdown = gr.Dropdown(
            label="Sort % MC Given",
            choices=["desc", "asc"],
            value="desc",
            allow_custom_value=False
        )
    
    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    patient_state = gr.State()
    claim_state = gr.State()
    mc_state = gr.State()

    with gr.Tabs():
        with gr.TabItem("Provider Insights"):
            gr.Markdown("## Provider Insights Dashboard (Yearly)")
            with gr.Row():
                prov_chart1 = gr.Image(label="Total Visits by Providers", interactive=False)
                prov_chart2 = gr.Image(label="Total MC by Providers", interactive=False)
            with gr.Row():
                prov_chart3 = gr.Image(label="% MC Given by Providers", interactive=False, visible=True)
            with gr.Row():
                prov_chart4 = gr.Image(label="Total Claim by Providers", interactive=False)
                prov_chart5 = gr.Image(label="Average Claim per Visit by Providers", interactive=False)
            download_btn_prov = gr.Button("Download Provider Charts as PPTX")

        with gr.TabItem("Employee Insights"):
            gr.Markdown("## Employee Insights Dashboard")
            with gr.Row():
                emp_chart1 = gr.Image(label="Total Visits by Employees", interactive=False)
                emp_chart2 = gr.Image(label="Total Claim by Employees", interactive=False)
            with gr.Row():
                emp_chart3 = gr.Image(label="Average Claim per Visit by Employees", interactive=False)
                emp_chart4 = gr.Image(label="Total MC by Employees", interactive=False)
            with gr.Row():
                emp_chart5 = gr.Image(label="Average MC per Visit by Employees", interactive=False)
                emp_chart6 = gr.Image(label="Claim Distribution by Division", interactive=False)
            download_btn_emp = gr.Button("Download Employee Charts as PPTX")

    def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order):
        try:
            logger.info("Starting scrape_and_store")
            patient_data_by_year_month, claim_data_by_year, mc_data_by_year, status = scrape_data(url, user_id, password)
            if patient_data_by_year_month is None or claim_data_by_year is None or mc_data_by_year is None:
                logger.warning(f"Scraping failed: {status}")
                return (
                    status, None, None, None, 
                    gr.update(choices=["2024"], value="2024", interactive=False),
                    gr.update(choices=["Yearly"], value="Yearly", interactive=False),
                    None, None, None, None, None, None, None, None, None, None, None
                )

            available_years = sorted(set(patient_data_by_year_month.keys()) | set(claim_data_by_year.keys()) | set(mc_data_by_year.keys()))
            year_choices = [str(year) for year in available_years]
            default_year = year_choices[0] if year_choices else "2024"
            month_choices = ["Yearly"] + [f"{year}-{month:02d}" for year in available_years for month in range(1, 13)]

            provider_images, employee_images, provider_figs, employee_figs = generate_dashboard_charts(
                patient_data_by_year_month, claim_data_by_year, mc_data_by_year, default_year, show_mc_pct, mc_sort_order)
            logger.info("Scraping and chart generation completed successfully")
            return (
                status, patient_data_by_year_month, claim_data_by_year, mc_data_by_year,
                gr.update(choices=year_choices, value=default_year, interactive=True),
                gr.update(choices=month_choices, value="Yearly", interactive=True),
                provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
                employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
            )
        except Exception as e:
            error_msg = f"Error in scrape_and_store: {str(e)}\nTrace: {traceback.format_exc()}"
            logger.error(error_msg)
            return error_msg, None, None, None, gr.update(), gr.update(), None, None, None, None, None, None, None, None, None, None, None

    def update_dashboard(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Updating dashboard for {year}, {month}")
            if not patient_data_by_year_month or not claim_data_by_year or not mc_data_by_year:
                logger.warning("No data available for dashboard update")
                return [None] * 11
            
            if month == "Yearly":
                provider_images, employee_images, _, _ = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
            else:
                provider_images, _, _, _ = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
                employee_images, _ = generate_monthly_patient_charts(patient_data_by_year_month, year, month)
            
            if employee_images is None or len(employee_images) < 6:
                logger.warning("Employee images incomplete, returning None for missing charts")
                employee_images = employee_images or [None] * 6
                employee_images += [None] * (6 - len(employee_images))
            
            return (
                provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
                employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
            )
        except Exception as e:
            logger.error(f"Error updating dashboard: {str(e)}\nTrace: {traceback.format_exc()}")
            return [None] * 11

    def download_provider_pptx(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Downloading provider PPTX for {year}, {month}")
            provider_images, _, provider_figs, _ = generate_dashboard_charts(
                patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
            pptx_path = charts_to_pptx(provider_figs, [], year if month == "Yearly" else f"{datetime.strptime(month.split('-')[1], '%m').strftime('%B')} {year}")
            label = f"Provider_Charts_{year}.pptx" if month == "Yearly" else f"Provider_Charts_{datetime.strptime(month.split('-')[1], '%m').strftime('%B')}_{year}.pptx"
            if pptx_path:
                return gr.File(value=pptx_path, label=label)
            else:
                logger.warning("Failed to generate provider PPTX")
                return None
        except Exception as e:
            logger.error(f"Error downloading provider PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
            return None

    def download_employee_pptx(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Downloading employee PPTX for {year}, {month}")
            if month == "Yearly":
                _, employee_images, _, employee_figs = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
                pptx_path = charts_to_pptx([], employee_figs, year)
                label = f"Employee_Charts_{year}.pptx"
            else:
                employee_images, employee_figs = generate_monthly_patient_charts(patient_data_by_year_month, year, month)
                month_name = datetime.strptime(month.split('-')[1], '%m').strftime('%B')
                pptx_path = charts_to_pptx([], employee_figs, f"{month_name} {year}")
                label = f"Employee_Charts_{month_name}_{year}.pptx"
            if pptx_path:
                return gr.File(value=pptx_path, label=label)
            else:
                logger.warning("Failed to generate employee PPTX")
                return None
        except Exception as e:
            logger.error(f"Error downloading employee PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
            return None

    scrape_btn.click(
        fn=scrape_and_store,
        inputs=[url_input, user_id_input, password_input, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            status_output, patient_state, claim_state, mc_state, year_dropdown, month_dropdown,
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    year_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    month_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    show_mc_pct_checkbox.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    mc_sort_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    download_btn_prov.click(
        fn=download_provider_pptx,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=gr.File()
    )

    download_btn_emp.click(
        fn=download_employee_pptx,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=gr.File()
    )

demo.launch(share=True)

2025-03-10 08:58:20,214 - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7860


2025-03-10 08:58:21,246 - INFO - HTTP Request: GET http://127.0.0.1:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
2025-03-10 08:58:21,380 - INFO - HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
2025-03-10 08:58:23,005 - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"


* Running on public URL: https://c71ee624386bd289ab.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


2025-03-10 08:58:27,002 - INFO - HTTP Request: HEAD https://c71ee624386bd289ab.gradio.live "HTTP/1.1 200 OK"




2025-03-10 09:01:57,810 - INFO - Starting scrape_and_store
2025-03-10 09:02:19,390 - INFO - Get LATEST edgedriver version for Edge 133.0.3065
2025-03-10 09:02:19,582 - INFO - Get LATEST edgedriver version for Edge 133.0.3065
2025-03-10 09:02:19,676 - INFO - Get LATEST edgedriver version for Edge 133.0.3065
2025-03-10 09:02:19,768 - INFO - About to download new driver from https://msedgedriver.azureedge.net/133.0.3065.92/edgedriver_win64.zip
2025-03-10 09:02:21,573 - INFO - Driver downloading response is 200
2025-03-10 09:02:27,702 - INFO - Get LATEST edgedriver version for Edge 133.0.3065
2025-03-10 09:02:29,419 - INFO - Driver has been saved in cache [C:\Users\DELL-INTERN-HR\.wdm\drivers\edgedriver\win64\133.0.3065.92]
2025-03-10 09:02:36,496 - INFO - Connecting to http://119.8.163.172:8080/ClaimEXMVR/Login/index.jsp
2025-03-10 09:02:39,015 - INFO - Clicking login image
2025-03-10 09:02:45,079 - INFO - Entering credentials
2025-03-10 09:02:46,914 - INFO - Clicking Continue button
2025

In [6]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from datetime import datetime, timedelta
import time
import numpy as np
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException, WebDriverException
from io import BytesIO
from pptx import Presentation
from pptx.util import Inches
import tempfile
import logging
import traceback

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Scraping Functions ---
def wait_for_element(driver, locator, timeout=20):
    try:
        return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
    except TimeoutException as e:
        logger.error(f"Timeout waiting for element {locator}: {str(e)}")
        raise

def select_date_month_day(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        logger.info(f"Selecting date {date_str} for {date_input_id}")
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        date_input.click()
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
        month_option.click()
        day = date_to_select.day
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
        day_element.click()
        time.sleep(5)
    except Exception as e:
        logger.error(f"Error in select_date_month_day for {date_input_id}: {str(e)}")
        raise

def select_date(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        logger.info(f"Selecting date {date_str} for {date_input_id}")
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        date_input.click()
        time.sleep(2)
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
        time.sleep(2)
        month_option.click()
        year_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-year'))
        year_option = year_select.find_element(By.XPATH, f"//option[@value='{date_to_select.year}']")
        year_option.click()
        day = date_to_select.day
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
        day_element.click()
        time.sleep(5)
    except Exception as e:
        logger.error(f"Error in select_date for {date_input_id}: {str(e)}")
        raise

def extract_grid_data_clm_summary(driver):
    data = []
    try:
        logger.info("Extracting claim summary data")
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_pjqgridClmSummbyProv"))
        total_pages = int(total_pages_element.text.strip())
        logger.info(f"Total pages: {total_pages}")
    except TimeoutException:
        logger.warning("No pagination element found, assuming single page")
        total_pages = 1
    except Exception as e:
        logger.error(f"Error getting total pages: {str(e)}")
        return data

    for current_page in range(1, total_pages + 1):
        try:
            logger.info(f"Processing page {current_page}")
            time.sleep(3)
            driver.execute_script("window.scrollTo(0, 0);")
            grid = wait_for_element(driver, (By.ID, "jqgridClmSummbyProv"))
            rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
            for row in rows:
                try:
                    provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text
                    visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text
                    claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text
                    total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text if \
                        row.find_elements(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']") else '0'
                    data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc})
                except Exception as e:
                    logger.warning(f"Error extracting row on page {current_page}: {str(e)}")
                    continue
            if current_page < total_pages:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 15).until(EC.staleness_of(rows[0]))
        except Exception as e:
            logger.error(f"Error on page {current_page}: {str(e)}")
            break
    logger.info(f"Extracted {len(data)} rows from claim summary")
    return data

def extract_grid_data_patient_analysis(driver):
    all_data = []
    max_retries = 5
    retry_delay = 5
    
    def extract_row(row, max_row_retries=3):
        for retry in range(max_row_retries):
            try:
                employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text
                employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text
                division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text
                total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text
                total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text
                total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text
                return {
                    'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division,
                    'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own,
                    'Total Claim (Dep)': total_claim_dep
                }
            except Exception as e:
                logger.warning(f"Error extracting patient row (Retry {retry + 1}/{max_row_retries}): {str(e)}")
                if retry + 1 == max_row_retries:
                    return None
                time.sleep(1)

    while True:
        for attempt in range(max_retries):
            try:
                logger.info(f"Extracting patient analysis data (Attempt {attempt + 1}/{max_retries})")
                driver.execute_script("window.scrollTo(0, 0);")
                grid = wait_for_element(driver, (By.ID, "jqgridCorpMcAnalysis"), timeout=30)
                
                if not driver.execute_script("return document.readyState === 'complete';"):
                    raise WebDriverException("Page not fully loaded; session may be unstable")
                
                rows = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
                logger.info(f"Found {len(rows)} rows on current page")
                
                for i, row in enumerate(rows):
                    row_data = extract_row(row)
                    if row_data:
                        all_data.append(row_data)
                    else:
                        logger.warning(f"Skipping row {i + 1} after max retries")
                    time.sleep(0.2)
                
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)
                next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=15)
                parent_div = wait_for_element(driver, (By.ID, "next_pjqgridCorpMcAnalysis"), timeout=15)
                
                if "disabled" in parent_div.get_attribute("class"):
                    logger.info("No more pages to scrape")
                    break
                
                pre_click_class = parent_div.get_attribute("class")
                logger.info(f"Pre-click button class: {pre_click_class}")
                try:
                    driver.execute_script("arguments[0].click();", parent_div)
                except Exception as e:
                    logger.warning(f"JavaScript click failed: {str(e)}, trying native click")
                    parent_div.click()
                
                WebDriverWait(driver, 30).until(
                    lambda driver: len(driver.find_elements(By.CSS_SELECTOR, "tr.jqgrow")) != len(rows) or
                                  "disabled" in parent_div.get_attribute("class"),
                    "Page did not update after clicking next"
                )
                post_click_class = parent_div.get_attribute("class")
                logger.info(f"Post-click button class: {post_click_class}")
                
                time.sleep(3)
                break
            
            except TimeoutException as e:
                logger.error(f"Attempt {attempt + 1} failed: {str(e)}\nTrace: {traceback.format_exc()}")
                if attempt + 1 < max_retries:
                    logger.warning("Pagination failed; refreshing page and retrying")
                    driver.refresh()
                    time.sleep(5)
                    continue
                logger.error("Max retries reached. Aborting patient analysis extraction.")
                break
            except WebDriverException as e:
                logger.error(f"Attempt {attempt + 1} failed: {str(e)}\nTrace: {traceback.format_exc()}")
                if attempt + 1 < max_retries:
                    time.sleep(retry_delay)
                    continue
                break
            except Exception as e:
                logger.error(f"Unexpected error during patient analysis pagination: {str(e)}\nTrace: {traceback.format_exc()}")
                break
        
        if "disabled" in parent_div.get_attribute("class") or attempt + 1 == max_retries:
            break

    logger.info(f"Extracted {len(all_data)} rows from patient analysis")
    return all_data

def extract_grid_data_mc(driver):
    data = []
    try:
        logger.info("Extracting MC data")
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_jqgrid"))
        total_pages = int(total_pages_element.text.strip())
        logger.info(f"Total pages: {total_pages}")
    except TimeoutException:
        logger.warning("No pagination element found, assuming single page")
        total_pages = 1
    except Exception as e:
        logger.error(f"Error getting total pages: {str(e)}")
        return data

    for current_page in range(1, total_pages + 1):
        try:
            logger.info(f"Processing page {current_page}")
            time.sleep(3)
            driver.execute_script("window.scrollTo(0, 0);")
            grid = wait_for_element(driver, (By.ID, "jqgrid"))
            rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
            for row in rows:
                try:
                    provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip()
                    total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip()
                    total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip()
                    data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits})
                except Exception as e:
                    logger.warning(f"Error extracting MC row on page {current_page}: {str(e)}")
                    continue
            if current_page < total_pages:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=10)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 15).until(EC.staleness_of(rows[0]))
        except Exception as e:
            logger.error(f"Error on page {current_page}: {str(e)}")
            break
    logger.info(f"Extracted {len(data)} rows from MC data")
    return data

def scrape_data(url, user_id, password):
    edge_options = Options()
    edge_options.add_argument("--disable-blink-features=AutomationControlled")
    edge_options.add_argument("--no-sandbox")
    edge_options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
    driver.set_page_load_timeout(60)
    start_year = 2024
    current_date = datetime.now()
    patient_data_by_year_month, claim_data_by_year, mc_data_by_year = {}, {}, {}

    def restart_driver():
        nonlocal driver
        try:
            driver.quit()
        except:
            pass
        driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
        driver.set_page_load_timeout(60)
        logger.info("Restarting WebDriver session")
        driver.get(url)
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

    try:
        logger.info(f"Connecting to {url}")
        driver.get(url)
        
        logger.info("Clicking login image")
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        
        logger.info("Entering credentials")
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        
        logger.info("Clicking Continue button")
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

        def keep_alive():
            try:
                driver.execute_script("return document.title;")
                logger.info("Session keep-alive check successful")
            except Exception as e:
                logger.warning(f"Session keep-alive failed: {str(e)}")
                restart_driver()

        # Patient Analysis (Month by Month)
        for year in range(start_year, current_date.year + 1):  # Up to current year
            patient_data_by_year_month[year] = {}
            start_month = 1 if year == 2024 else 1
            end_month = 12 if year < current_date.year else current_date.month
            for month in range(start_month, end_month + 1):
                start_date = f"{year}-{month:02d}-01"
                last_day = (datetime(year, month + 1, 1) - timedelta(days=1) if month < 12 else datetime(year, 12, 31)).strftime('%Y-%m-%d')
                end_date = last_day if year < current_date.year or (year == current_date.year and month < current_date.month) else current_date.strftime('%Y-%m-%d')
                
                logger.info(f"Scraping Patient Analysis for {year}-{month:02d} ({start_date} to {end_date})")
                keep_alive()
                productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
                productivity_link.click()
                patient_analysis_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]"))
                patient_analysis_link.click()
                select_date(driver, start_date, "txtStartDate")
                select_date(driver, end_date, "txtEndDate")
                search_button = wait_for_element(driver, (By.ID, "btnSearch"))
                search_button.click()
                time.sleep(5)
                dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                select = Select(dropdown)
                select.select_by_value("100")
                time.sleep(15)
                patient_data = extract_grid_data_patient_analysis(driver)
                patient_df = pd.DataFrame(patient_data)
                numeric_cols_patient = ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)']
                for col in numeric_cols_patient:
                    patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce')
                patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)']
                patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit']
                patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit']
                patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)']
                patient_data_by_year_month[year][f"{year}-{month:02d}"] = patient_df
                logger.info(f"Completed Patient Analysis for {year}-{month:02d} with {len(patient_df)} rows")

        # MC Data (Yearly)
        for year in range(start_year, current_date.year + 1):
            if year == 2024:
                start_date, end_date = "2024-01-01", "2024-12-31"
            else:
                start_date, end_date = f"{year}-01-01", current_date.strftime('%Y-%m-%d')
            logger.info(f"Scraping MC by Provider for {year} ({start_date} to {end_date})")
            keep_alive()
            productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
            productivity_link.click()
            mc_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]"))
            mc_link.click()
            time.sleep(2)
            select_date(driver, start_date, "txtStartDate")
            select_date(driver, end_date, "txtEndDate")
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            search_button.click()
            time.sleep(10)
            dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
            driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
            select = Select(dropdown)
            select.select_by_value("100")
            time.sleep(10)
            mc_data = extract_grid_data_mc(driver)
            mc_df = pd.DataFrame(mc_data)
            numeric_cols_mc = ['Total MC Given', 'No. of Visit']
            for col in numeric_cols_mc:
                mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce')
            mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit']) * 100
            mc_data_by_year[year] = mc_df
            logger.info(f"Completed MC by Provider for {year} with {len(mc_df)} rows")

        # Claim Data (Yearly)
        for year in range(start_year, current_date.year + 1):
            if year == 2024:
                start_date, end_date = "2024-01-01", "2024-12-31"
            else:
                start_date, end_date = f"{year}-01-01", current_date.strftime('%Y-%m-%d')
            logger.info(f"Scraping Claim Summary for {year} ({start_date} to {end_date})")
            keep_alive()
            reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
            reg_claims_link.click()
            providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
            providers_link.click()
            time.sleep(5)
            select_date_month_day(driver, start_date, "txtFromDate")
            select_date_month_day(driver, end_date, "txtToDate")
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            driver.execute_script("arguments[0].click();", search_button)
            time.sleep(10)
            dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
            driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
            select = Select(dropdown)
            select.select_by_value("100")
            time.sleep(5)
            claim_data = extract_grid_data_clm_summary(driver)
            claim_df = pd.DataFrame(claim_data)
            numeric_cols_claim = ['No of Visits', 'Total Claim', 'Total MC (Days)']
            for col in numeric_cols_claim:
                claim_df[col] = pd.to_numeric(claim_df[col], errors='coerce')
            claim_df['Avg Claim per Visit'] = claim_df['Total Claim'] / claim_df['No of Visits']
            claim_data_by_year[year] = claim_df
            logger.info(f"Completed Claim Summary for {year} with {len(claim_df)} rows")

        return patient_data_by_year_month, claim_data_by_year, mc_data_by_year, "Data scraped successfully!"
    except TimeoutException as e:
        error_msg = f"Timeout Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    except WebDriverException as e:
        error_msg = f"WebDriver Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    except Exception as e:
        error_msg = f"General Error: {str(e)}\nTrace: {traceback.format_exc()}"
        logger.error(error_msg)
        return None, None, None, error_msg
    finally:
        try:
            driver.quit()
            logger.info("WebDriver closed")
        except Exception as e:
            logger.error(f"Error closing WebDriver: {str(e)}")

# --- Plotting Functions ---
def create_placeholder_chart(title, message="No data available"):
    fig = plt.figure(figsize=(12, 6))
    plt.text(0.5, 0.5, message, ha='center', va='center', fontsize=14, color='gray')
    plt.title(title, pad=15)
    plt.axis('off')
    return fig

def generate_dashboard_charts(patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"):
    try:
        logger.info(f"Generating yearly dashboard charts for {year}")
        if not patient_data_by_year_month or not claim_data_by_year or not mc_data_by_year:
            logger.error("No data available to generate charts")
            return [None] * 5, [None] * 6, [], []

        year_int = int(year)
        patient_df_year = pd.concat(patient_data_by_year_month.get(year_int, {}).values(), ignore_index=True) if year_int in patient_data_by_year_month else pd.DataFrame()
        claim_df = claim_data_by_year.get(year_int, pd.DataFrame())
        mc_df = mc_data_by_year.get(year_int, pd.DataFrame())
        
        logger.info(f"Data sizes for {year}: Patient={len(patient_df_year)}, Claim={len(claim_df)}, MC={len(mc_df)}")

        if not patient_df_year.empty:
            patient_df_year = patient_df_year.groupby(['Employee Name', 'Employee No', 'Division/Department']).agg({
                'Total Visit': 'sum', 'Total MC (Days)': 'sum', 'Total Claim (Own)': 'sum', 'Total Claim (Dep)': 'sum',
                'Total Claim (Combined)': 'sum', 'Avg Claim per Visit': 'mean', 'Avg MC per Visit': 'mean', 'Avg Claim per MC': 'mean'
            }).reset_index()

        sns.set(style="whitegrid", palette="muted")
        plt.rcParams.update({
            'font.family': 'Arial', 'font.size': 12, 'axes.titlesize': 16, 'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
            'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5',
            'axes.facecolor': '#ffffff', 'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
        })
        
        provider_charts = []
        employee_charts = []

        # Provider Charts
        if mc_df.empty:
            provider_charts.append(create_placeholder_chart(f'Top 10 Providers by Total Visits ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_prov_visits = mc_df.sort_values('No. of Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_prov_visits, x='No. of Visit', y='Provider', hue='Provider', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Providers by Total Visits ({year})', pad=15)
            ax.set_xlabel('Total Visits')
            ax.set_ylabel('Provider')
            for i, v in enumerate(top_prov_visits['No. of Visit']):
                ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            provider_charts.append(fig)
            plt.close(fig)

        if mc_df.empty:
            provider_charts.append(create_placeholder_chart(f'Top 10 Providers by Total MC Given ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_prov_mc = mc_df.sort_values('Total MC Given', ascending=False).head(10)
            ax = sns.barplot(data=top_prov_mc, x='Total MC Given', y='Provider', hue='Provider', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Providers by Total MC Given ({year})', pad=15)
            ax.set_xlabel('Total MC (Days)')
            ax.set_ylabel('Provider')
            for i, v in enumerate(top_prov_mc['Total MC Given']):
                ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            provider_charts.append(fig)
            plt.close(fig)

        if show_mc_pct:
            if mc_df.empty:
                provider_charts.append(create_placeholder_chart(f'Top 20 Providers by % MC Given ({year})'))
            else:
                fig = plt.figure(figsize=(18, 9))
                top_visits_provs = set(mc_df.sort_values('No. of Visit', ascending=False).head(10)['Provider'])
                top_mc_provs = set(mc_df.sort_values('Total MC Given', ascending=False).head(10)['Provider'])
                top_provs = top_visits_provs.union(top_mc_provs)
                top_prov_mc_pct = mc_df[mc_df['Provider'].isin(top_provs)].sort_values('% MC Given', ascending=(mc_sort_order == "asc")).head(20)
                ax = sns.barplot(data=top_prov_mc_pct, x='Provider', y='% MC Given', hue='Provider', palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
                ax.set_title(f'Top 20 Providers by % MC Given ({year}) - Sorted {"Ascending" if mc_sort_order == "asc" else "Descending"}', pad=15)
                ax.set_ylabel('% MC Given', fontsize=14)
                ax.set_xlabel('Provider', fontsize=14)
                plt.xticks(rotation=45, ha='right', fontsize=11)
                for i, v in enumerate(top_prov_mc_pct['% MC Given']):
                    ax.text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10, color='#333333')
                plt.tight_layout()
                provider_charts.append(fig)
                plt.close(fig)
        else:
            provider_charts.append(None)

        if claim_df.empty:
            provider_charts.append(create_placeholder_chart(f'Top 10 Providers by Total Claim ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_prov_claim = claim_df.sort_values('Total Claim', ascending=False).head(10)
            ax = sns.barplot(data=top_prov_claim, x='Total Claim', y='Provider Name', hue='Provider Name', palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Providers by Total Claim ({year})', pad=15)
            ax.set_xlabel('Total Claim ($)')
            ax.set_ylabel('Provider')
            for i, v in enumerate(top_prov_claim['Total Claim']):
                ax.text(v + 0.5, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            provider_charts.append(fig)
            plt.close(fig)

        if claim_df.empty:
            provider_charts.append(create_placeholder_chart(f'Top 10 Providers by Avg Claim per Visit ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_prov_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_prov_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})', pad=15)
            ax.set_ylabel('Avg Claim per Visit ($)')
            ax.set_xlabel('Provider')
            plt.xticks(rotation=45, ha='right')
            for i, v in enumerate(top_prov_avg_claim['Avg Claim per Visit']):
                ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            provider_charts.append(fig)
            plt.close(fig)

        # Employee Charts
        if patient_df_year.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Total Visits ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_emp_visits = patient_df_year.sort_values('Total Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Total Visits ({year})', pad=15)
            ax.set_xlabel('Total Visits')
            ax.set_ylabel('Employee')
            for i, v in enumerate(top_emp_visits['Total Visit']):
                ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_year.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Total Claim ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_emp_claim = patient_df_year.sort_values('Total Claim (Combined)', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Total Claim ({year})', pad=15)
            ax.set_xlabel('Total Claim ($)')
            ax.set_ylabel('Employee')
            for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
                ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_year.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Avg Claim per Visit ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_emp_avg_claim = patient_df_year.sort_values('Avg Claim per Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({year})', pad=15)
            ax.set_ylabel('Avg Claim per Visit ($)')
            ax.set_xlabel('Employee')
            plt.xticks(rotation=45, ha='right')
            for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
                ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_year.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Total MC ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_emp_mc = patient_df_year.sort_values('Total MC (Days)', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Total MC ({year})', pad=15)
            ax.set_xlabel('Total MC (Days)')
            ax.set_ylabel('Employee')
            for i, v in enumerate(top_emp_mc['Total MC (Days)']):
                ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_year.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Avg MC per Visit ({year})'))
        else:
            fig = plt.figure(figsize=(12, 6))
            top_emp_avg_mc = patient_df_year.sort_values('Avg MC per Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Avg MC per Visit ({year})', pad=15)
            ax.set_ylabel('Avg MC per Visit (Days)')
            ax.set_xlabel('Employee')
            plt.xticks(rotation=45, ha='right')
            for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
                ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_year.empty:
            employee_charts.append(create_placeholder_chart(f'Claim Distribution by Division ({year})'))
        else:
            fig = plt.figure(figsize=(10, 6))
            division_claims = patient_df_year.groupby('Division/Department')['Total Claim (Combined)'].sum()
            plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
                    startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
            plt.title(f'Claim Distribution by Division ({year})', pad=15)
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        provider_images = [fig_to_image(fig) if fig is not None else None for fig in provider_charts]
        employee_images = [fig_to_image(fig) for fig in employee_charts]
        
        logger.info(f"Generated {len(provider_charts)} provider charts and {len(employee_charts)} employee charts for year {year}")
        return provider_images, employee_images, provider_charts, employee_charts
    except Exception as e:
        logger.error(f"Error generating charts: {str(e)}\nTrace: {traceback.format_exc()}")
        return [None] * 5, [None] * 6, [], []

def generate_monthly_patient_charts(patient_data_by_year_month, year, month):
    try:
        logger.info(f"Generating monthly patient charts for {year}-{month}")
        year_int = int(year)
        if not patient_data_by_year_month or year_int not in patient_data_by_year_month or month not in patient_data_by_year_month[year_int]:
            logger.warning(f"No patient data available for {year}-{month}")
            month_name = datetime.strptime(month, '%m').strftime('%B')
            titles = [
                f'Top 10 Employees by Total Visits ({month_name} {year})',
                f'Top 10 Employees by Total Claim ({month_name} {year})',
                f'Top 10 Employees by Avg Claim per Visit ({month_name} {year})',
                f'Top 10 Employees by Total MC ({month_name} {year})',
                f'Top 10 Employees by Avg MC per Visit ({month_name} {year})',
                f'Claim Distribution by Division ({month_name} {year})'
            ]
            employee_charts = [create_placeholder_chart(title) for title in titles]
            employee_images = [fig_to_image(fig) for fig in employee_charts]
            return employee_images, employee_charts
        
        patient_df_month = patient_data_by_year_month[year_int].get(f"{year}-{month}", pd.DataFrame())
        logger.info(f"Patient data size for {year}-{month}: {len(patient_df_month)} rows")
        
        month_name = datetime.strptime(month, '%m').strftime('%B')
        sns.set(style="whitegrid", palette="muted")
        plt.rcParams.update({
            'font.family': 'Arial', 'font.size': 12, 'axes.titlesize': 16, 'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
            'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5',
            'axes.facecolor': '#ffffff', 'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
        })
        
        employee_charts = []

        if patient_df_month.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Total Visits ({month_name} {year})'))
        else:
            fig = plt.figure(figsize=(12, 7))
            top_emp_visits = patient_df_month.sort_values('Total Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Total Visits ({month_name} {year})', pad=15)
            ax.set_xlabel('Total Visits')
            ax.set_ylabel('Employee')
            for i, v in enumerate(top_emp_visits['Total Visit']):
                ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_month.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Total Claim ({month_name} {year})'))
        else:
            fig = plt.figure(figsize=(12, 7))
            top_emp_claim = patient_df_month.sort_values('Total Claim (Combined)', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Total Claim ({month_name} {year})', pad=15)
            ax.set_xlabel('Total Claim ($)')
            ax.set_ylabel('Employee')
            for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
                ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_month.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Avg Claim per Visit ({month_name} {year})'))
        else:
            fig = plt.figure(figsize=(12, 7))
            top_emp_avg_claim = patient_df_month.sort_values('Avg Claim per Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({month_name} {year})', pad=15)
            ax.set_ylabel('Avg Claim per Visit ($)')
            ax.set_xlabel('Employee')
            plt.xticks(rotation=45, ha='right')
            for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
                ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_month.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Total MC ({month_name} {year})'))
        else:
            fig = plt.figure(figsize=(12, 7))
            top_emp_mc = patient_df_month.sort_values('Total MC (Days)', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Total MC ({month_name} {year})', pad=15)
            ax.set_xlabel('Total MC (Days)')
            ax.set_ylabel('Employee')
            for i, v in enumerate(top_emp_mc['Total MC (Days)']):
                ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_month.empty:
            employee_charts.append(create_placeholder_chart(f'Top 10 Employees by Avg MC per Visit ({month_name} {year})'))
        else:
            fig = plt.figure(figsize=(12, 7))
            top_emp_avg_mc = patient_df_month.sort_values('Avg MC per Visit', ascending=False).head(10)
            ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
            ax.set_title(f'Top 10 Employees by Avg MC per Visit ({month_name} {year})', pad=15)
            ax.set_ylabel('Avg MC per Visit (Days)')
            ax.set_xlabel('Employee')
            plt.xticks(rotation=45, ha='right')
            for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
                ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        if patient_df_month.empty:
            employee_charts.append(create_placeholder_chart(f'Claim Distribution by Division ({month_name} {year})'))
        else:
            fig = plt.figure(figsize=(10, 7))
            division_claims = patient_df_month.groupby('Division/Department')['Total Claim (Combined)'].sum()
            plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
                    startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
            plt.title(f'Claim Distribution by Division ({month_name} {year})', pad=15)
            plt.tight_layout()
            employee_charts.append(fig)
            plt.close(fig)

        employee_images = [fig_to_image(fig) for fig in employee_charts]
        
        logger.info(f"Generated {len(employee_charts)} monthly patient charts for {month_name} {year}")
        return employee_images, employee_charts
    except Exception as e:
        logger.error(f"Error generating monthly patient charts: {str(e)}\nTrace: {traceback.format_exc()}")
        month_name = datetime.strptime(month, '%m').strftime('%B')
        titles = [
            f'Top 10 Employees by Total Visits ({month_name} {year})',
            f'Top 10 Employees by Total Claim ({month_name} {year})',
            f'Top 10 Employees by Avg Claim per Visit ({month_name} {year})',
            f'Top 10 Employees by Total MC ({month_name} {year})',
            f'Top 10 Employees by Avg MC per Visit ({month_name} {year})',
            f'Claim Distribution by Division ({month_name} {year})'
        ]
        employee_charts = [create_placeholder_chart(title) for title in titles]
        employee_images = [fig_to_image(fig) for fig in employee_charts]
        return employee_images, employee_charts

def fig_to_image(fig):
    try:
        if fig is None:
            return None
        fig.canvas.draw()
        img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
        img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
        return img_array
    except Exception as e:
        logger.error(f"Error converting figure to image: {str(e)}")
        return None

def charts_to_pptx(provider_charts, employee_charts, title):
    try:
        logger.info(f"Generating PPTX for {title}")
        prs = Presentation()
        slide_layout = prs.slide_layouts[5]
        prs.slide_width = Inches(13.33)
        prs.slide_height = Inches(7.5)

        provider_titles = [
            f"Top 10 Providers by Total Visits ({title})",
            f"Top 10 Providers by Total MC Given ({title})",
            f"Top 20 Providers by % MC Given ({title})",
            f"Top 10 Providers by Total Claim ({title})",
            f"Top 10 Providers by Avg Claim per Visit ({title})"
        ]
        
        employee_titles = [
            f"Top 10 Employees by Total Visits ({title})",
            f"Top 10 Employees by Total Claim ({title})",
            f"Top 10 Employees by Avg Claim per Visit ({title})",
            f"Top 10 Employees by Total MC ({title})",
            f"Top 10 Employees by Avg MC per Visit ({title})",
            f"Claim Distribution by Division ({title})"
        ]

        if provider_charts:
            slide = prs.slides.add_slide(slide_layout)
            slide.shapes.title.text = f"Provider Insights ({title})"
            num_charts = sum(1 for fig in provider_charts if fig is not None)
            if num_charts == 0:
                logger.warning("No valid provider charts to export")
            else:
                charts_per_row = 3
                rows = (num_charts + charts_per_row - 1) // charts_per_row
                chart_width = Inches(3.8)
                chart_height = Inches(2.5) if rows > 1 else Inches(5)
                top_start = Inches(1.5)
                valid_charts = [fig for fig in provider_charts if fig is not None]
                
                for idx, fig in enumerate(valid_charts):
                    row = idx // charts_per_row
                    col = idx % charts_per_row
                    left = Inches(0.5 + col * 4.5)
                    top = top_start + row * (chart_height + Inches(0.2))
                    img_stream = BytesIO()
                    fig.savefig(img_stream, format='png', bbox_inches='tight', dpi=100)
                    img_stream.seek(0)
                    if img_stream.getvalue():
                        slide.shapes.add_picture(img_stream, left, top, width=chart_width, height=chart_height)
                        logger.info(f"Added chart {idx+1}: '{provider_titles[idx]}' to Provider slide")
                    else:
                        logger.warning(f"Failed to save chart {idx+1}: '{provider_titles[idx]}' - empty image stream")

        if employee_charts:
            slide = prs.slides.add_slide(slide_layout)
            slide.shapes.title.text = f"Employee Insights ({title})"
            num_charts = sum(1 for fig in employee_charts if fig is not None)
            if num_charts == 0:
                logger.warning("No valid employee charts to export")
            else:
                charts_per_row = 3
                rows = (num_charts + charts_per_row - 1) // charts_per_row
                chart_width = Inches(3.8)
                chart_height = Inches(2.5) if rows > 1 else Inches(5)
                top_start = Inches(1.5)
                valid_charts = [fig for fig in employee_charts if fig is not None]
                
                for idx, fig in enumerate(valid_charts):
                    row = idx // charts_per_row
                    col = idx % charts_per_row
                    left = Inches(0.5 + col * 4.5)
                    top = top_start + row * (chart_height + Inches(0.2))
                    img_stream = BytesIO()
                    fig.savefig(img_stream, format='png', bbox_inches='tight', dpi=100)
                    img_stream.seek(0)
                    if img_stream.getvalue():
                        slide.shapes.add_picture(img_stream, left, top, width=chart_width, height=chart_height)
                        logger.info(f"Added chart {idx+1}: '{employee_titles[idx]}' to Employee slide")
                    else:
                        logger.warning(f"Failed to save chart {idx+1}: '{employee_titles[idx]}' - empty image stream")

        with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_file:
            prs.save(tmp_file.name)
            tmp_file_path = tmp_file.name

        logger.info(f"PPTX file saved to temporary path: {tmp_file_path}")
        return tmp_file_path
    except Exception as e:
        logger.error(f"Error generating PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
        return None

# --- Gradio Interface ---
with gr.Blocks(title="Claims Analysis Dashboard", css="""
    body { background-color: #f5f6f5; }
    h1, h2 { color: #333333; font-family: Arial; }
""") as demo:
    gr.Markdown("# Claims Analysis Dashboard (2024 - Present)")
    
    with gr.Row():
        url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here", lines=1)
        user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID", lines=1)
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password", lines=1)
    scrape_btn = gr.Button("Submit", variant="primary")
    
    with gr.Row():
        year_dropdown = gr.Dropdown(
            label="Select Year to View Data",
            choices=["2024", "2025"],
            value="2024",
            allow_custom_value=False,
            interactive=False
        )
        month_dropdown = gr.Dropdown(
            label="Select Month (Optional)",
            choices=["Yearly"] + [f"{month:02d}" for month in range(1, 13)],
            value="Yearly",
            allow_custom_value=False,
            interactive=False
        )
        show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True)
        mc_sort_dropdown = gr.Dropdown(
            label="Sort % MC Given",
            choices=["desc", "asc"],
            value="desc",
            allow_custom_value=False
        )
    
    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    patient_state = gr.State()
    claim_state = gr.State()
    mc_state = gr.State()

    with gr.Tabs():
        with gr.TabItem("Provider Insights"):
            gr.Markdown("## Provider Insights Dashboard (Yearly)")
            with gr.Row():
                prov_chart1 = gr.Image(label="Total Visits by Providers", interactive=False)
                prov_chart2 = gr.Image(label="Total MC by Providers", interactive=False)
            with gr.Row():
                prov_chart3 = gr.Image(label="% MC Given by Providers", interactive=False, visible=True)
            with gr.Row():
                prov_chart4 = gr.Image(label="Total Claim by Providers", interactive=False)
                prov_chart5 = gr.Image(label="Average Claim per Visit by Providers", interactive=False)
            download_btn_prov = gr.Button("Download Provider Charts as PPTX")

        with gr.TabItem("Employee Insights"):
            gr.Markdown("## Employee Insights Dashboard")
            with gr.Row():
                emp_chart1 = gr.Image(label="Total Visits by Employees", interactive=False)
                emp_chart2 = gr.Image(label="Total Claim by Employees", interactive=False)
            with gr.Row():
                emp_chart3 = gr.Image(label="Average Claim per Visit by Employees", interactive=False)
                emp_chart4 = gr.Image(label="Total MC by Employees", interactive=False)
            with gr.Row():
                emp_chart5 = gr.Image(label="Average MC per Visit by Employees", interactive=False)
                emp_chart6 = gr.Image(label="Claim Distribution by Division", interactive=False)
            download_btn_emp = gr.Button("Download Employee Charts as PPTX")

    def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order):
        try:
            logger.info("Starting scrape_and_store")
            patient_data_by_year_month, claim_data_by_year, mc_data_by_year, status = scrape_data(url, user_id, password)
            if patient_data_by_year_month is None or claim_data_by_year is None or mc_data_by_year is None:
                logger.warning(f"Scraping failed: {status}")
                return (
                    status, None, None, None, 
                    gr.update(choices=["2024"], value="2024", interactive=False),
                    gr.update(choices=["Yearly"] + [f"{month:02d}" for month in range(1, 13)], value="Yearly", interactive=False),
                    None, None, None, None, None, None, None, None, None, None, None
                )

            available_years = sorted(set(patient_data_by_year_month.keys()) | set(claim_data_by_year.keys()) | set(mc_data_by_year.keys()))
            year_choices = [str(year) for year in available_years]
            default_year = year_choices[0] if year_choices else "2024"

            logger.info(f"Available years: {year_choices}")
            provider_images, employee_images, provider_figs, employee_figs = generate_dashboard_charts(
                patient_data_by_year_month, claim_data_by_year, mc_data_by_year, default_year, show_mc_pct, mc_sort_order)
            logger.info("Scraping and chart generation completed successfully")
            return (
                status, patient_data_by_year_month, claim_data_by_year, mc_data_by_year,
                gr.update(choices=year_choices, value=default_year, interactive=True),
                gr.update(choices=["Yearly"] + [f"{month:02d}" for month in range(1, 13)], value="Yearly", interactive=True),
                provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
                employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
            )
        except Exception as e:
            error_msg = f"Error in scrape_and_store: {str(e)}\nTrace: {traceback.format_exc()}"
            logger.error(error_msg)
            return error_msg, None, None, None, gr.update(), gr.update(), None, None, None, None, None, None, None, None, None, None, None

    def update_dashboard(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Updating dashboard for {year}, {month}")
            if not patient_data_by_year_month or not claim_data_by_year or not mc_data_by_year:
                logger.warning("No data available for dashboard update")
                return [None] * 11
            
            if month == "Yearly":
                provider_images, employee_images, _, _ = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
            else:
                provider_images, _, _, _ = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
                employee_images, _ = generate_monthly_patient_charts(patient_data_by_year_month, year, month)
            
            if employee_images is None or len(employee_images) < 6:
                logger.warning("Employee images incomplete, padding with None")
                employee_images = employee_images or [None] * 6
                employee_images += [None] * (6 - len(employee_images))
            
            if provider_images is None or len(provider_images) < 5:
                logger.warning("Provider images incomplete, padding with None")
                provider_images = provider_images or [None] * 5
                provider_images += [None] * (5 - len(provider_images))
            
            return (
                provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
                employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
            )
        except Exception as e:
            logger.error(f"Error updating dashboard: {str(e)}\nTrace: {traceback.format_exc()}")
            return [None] * 11

    def download_provider_pptx(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Downloading provider PPTX for {year}, {month}")
            provider_images, _, provider_figs, _ = generate_dashboard_charts(
                patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
            pptx_path = charts_to_pptx(provider_figs, [], year if month == "Yearly" else f"{datetime.strptime(month, '%m').strftime('%B')} {year}")
            label = f"Provider_Charts_{year}.pptx" if month == "Yearly" else f"Provider_Charts_{datetime.strptime(month, '%m').strftime('%B')}_{year}.pptx"
            if pptx_path:
                return gr.File(value=pptx_path, label=label)
            else:
                logger.warning("Failed to generate provider PPTX")
                return None
        except Exception as e:
            logger.error(f"Error downloading provider PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
            return None

    def download_employee_pptx(year, month, patient_data_by_year_month, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        try:
            logger.info(f"Downloading employee PPTX for {year}, {month}")
            if month == "Yearly":
                _, employee_images, _, employee_figs = generate_dashboard_charts(
                    patient_data_by_year_month, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
                pptx_path = charts_to_pptx([], employee_figs, year)
                label = f"Employee_Charts_{year}.pptx"
            else:
                employee_images, employee_figs = generate_monthly_patient_charts(patient_data_by_year_month, year, month)
                month_name = datetime.strptime(month, '%m').strftime('%B')
                pptx_path = charts_to_pptx([], employee_figs, f"{month_name} {year}")
                label = f"Employee_Charts_{month_name}_{year}.pptx"
            if pptx_path:
                return gr.File(value=pptx_path, label=label)
            else:
                logger.warning("Failed to generate employee PPTX")
                return None
        except Exception as e:
            logger.error(f"Error downloading employee PPTX: {str(e)}\nTrace: {traceback.format_exc()}")
            return None

    scrape_btn.click(
        fn=scrape_and_store,
        inputs=[url_input, user_id_input, password_input, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            status_output, patient_state, claim_state, mc_state, year_dropdown, month_dropdown,
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    year_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    month_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    show_mc_pct_checkbox.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    mc_sort_dropdown.change(
        fn=update_dashboard,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    download_btn_prov.click(
        fn=download_provider_pptx,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=gr.File()
    )

    download_btn_emp.click(
        fn=download_employee_pptx,
        inputs=[year_dropdown, month_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=gr.File()
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7865


2025-03-10 16:34:26,279 - INFO - HTTP Request: GET http://127.0.0.1:7865/gradio_api/startup-events "HTTP/1.1 200 OK"
2025-03-10 16:34:26,358 - INFO - HTTP Request: HEAD http://127.0.0.1:7865/ "HTTP/1.1 200 OK"
2025-03-10 16:34:26,374 - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2025-03-10 16:34:28,293 - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"


* Running on public URL: https://91b0a417d90b89fcfb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


2025-03-10 16:34:31,768 - INFO - HTTP Request: HEAD https://91b0a417d90b89fcfb.gradio.live "HTTP/1.1 200 OK"




2025-03-10 16:34:57,467 - INFO - Starting scrape_and_store
2025-03-10 16:35:11,601 - INFO - Get LATEST edgedriver version for Edge 134.0.3124
2025-03-10 16:35:11,774 - INFO - Get LATEST edgedriver version for Edge 134.0.3124
2025-03-10 16:35:11,902 - INFO - Driver [C:\Users\DELL-INTERN-HR\.wdm\drivers\edgedriver\win64\134.0.3124.51\msedgedriver.exe] found in cache
2025-03-10 16:35:15,080 - INFO - Connecting to http://119.8.163.172:8080/ClaimEXMVR/Login/index.jsp
2025-03-10 16:35:16,986 - INFO - Clicking login image
2025-03-10 16:35:18,869 - INFO - Entering credentials
2025-03-10 16:35:19,967 - INFO - Clicking Continue button
2025-03-10 16:35:40,253 - ERROR - Timeout waiting for element ('xpath', "//button[text()='Continue']"): Message: 

2025-03-10 16:35:40,347 - ERROR - Timeout Error: Message: 

Trace: Traceback (most recent call last):
  File "C:\Users\DELL-INTERN-HR\AppData\Local\Temp\ipykernel_11864\1479723754.py", line 307, in scrape_data
    continue_button = wait_for_element(dri