In [3]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from datetime import datetime
import time
import numpy as np
import calendar
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException

# --- Scraping Functions (Unchanged) ---
def wait_for_element(driver, locator):
    return WebDriverWait(driver, 10).until(EC.element_to_be_clickable(locator))

def select_date_month_day(driver, date_str, date_input_id):
    date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
    date_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, date_input_id)))
    date_input.click()
    month_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-month')))
    month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
    month_option.click()
    day = date_to_select.day
    day_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']")))
    day_element.click()
    time.sleep(5)

def select_date(driver, date_str, date_input_id):
    date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
    date_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, date_input_id)))
    date_input.click()
    time.sleep(2)
    month_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-month')))
    month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
    time.sleep(2)
    month_option.click()
    year_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-year')))
    year_option = year_select.find_element(By.XPATH, f"//option[@value='{date_to_select.year}']")
    year_option.click()
    day = date_to_select.day
    day_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']")))
    day_element.click()
    time.sleep(5)

def extract_grid_data_clm_summary(driver):
    data = []
    try:
        total_pages_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "sp_1_pjqgridClmSummbyProv")))
        total_pages = int(total_pages_element.text.strip())
    except:
        return data
    for current_page in range(1, total_pages + 1):
        time.sleep(3)
        driver.execute_script("window.scrollTo(0, 0);")
        grid = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "jqgridClmSummbyProv")))
        rows = WebDriverWait(grid, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
        for row in rows:
            try:
                provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text
                visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text
                claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text
                try:
                    total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text
                except:
                    total_mc = '0'
                data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc})
            except Exception as e:
                print(f"Error extracting row: {e}")
                continue
        if current_page < total_pages:
            try:
                next_button_div = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward")))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 10).until(EC.staleness_of(rows[0]))
            except:
                break
    return data

def extract_grid_data_patient_analysis(driver):
    all_data = []
    while True:
        driver.execute_script("window.scrollTo(0, 0);")
        grid = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "jqgridCorpMcAnalysis")))
        rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow")
        for row in rows:
            try:
                employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text
                employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text
                division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text
                total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text
                total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text
                total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text
                all_data.append({
                    'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division,
                    'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own,
                    'Total Claim (Dep)': total_claim_dep
                })
            except:
                continue
        try:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            next_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward")))
            parent_div = next_button.find_element(By.XPATH, "./parent::div")
            if "disabled" in parent_div.get_attribute("class"):
                break
            driver.execute_script("arguments[0].scrollIntoView(true);", parent_div)
            parent_div.click()
            WebDriverWait(driver, 10).until(EC.staleness_of(rows[0]))
        except:
            break
    return all_data

def extract_grid_data_mc(driver):
    data = []
    try:
        total_pages_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "sp_1_jqgrid")))
        total_pages = int(total_pages_element.text.strip())
    except:
        total_pages = 1
    for current_page in range(1, total_pages + 1):
        time.sleep(3)
        driver.execute_script("window.scrollTo(0, 0);")
        grid = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "jqgrid")))
        rows = WebDriverWait(grid, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
        for row in rows:
            try:
                provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip()
                total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip()
                total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip()
                data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits})
            except:
                continue
        if current_page < total_pages:
            try:
                next_button_div = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward")))
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 10).until(EC.staleness_of(rows[0]))
            except:
                break
    return data

def get_month_date_ranges(year, end_date=None):
    """Generate a list of (start_date, end_date) tuples for each month of the year up to end_date"""
    date_ranges = []
    current_date = datetime.now() if end_date is None else datetime.strptime(end_date, '%Y-%m-%d')
    for month in range(1, 13):
        if year == current_date.year and month > current_date.month:
            break
        start_date = datetime(year, month, 1)
        _, last_day = calendar.monthrange(year, month)
        end_date_month = datetime(year, month, last_day)
        if end_date_month > current_date:
            end_date_month = current_date
        date_ranges.append((start_date.strftime('%Y-%m-%d'), end_date_month.strftime('%Y-%m-%d')))
    return date_ranges

def scrape_data(url, user_id, password):
    edge_options = Options()
    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
    
    # Dictionary to store monthly and yearly data
    monthly_patient_data = {}
    patient_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    mc_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    claim_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    current_date = datetime.now().strftime('%Y-%m-%d')
    
    try:
        driver.get(url)
        image = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']")))
        image.click()
        user_id_field = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "txtloginid")))
        user_id_field.send_keys(user_id)
        password_field = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "inputpss")))
        password_field.send_keys(password)
        sign_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']")))
        sign_button.click()
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

        # Get monthly date ranges for Patient Analysis up to current date
        date_ranges_2024 = get_month_date_ranges(2024, current_date)
        date_ranges_2025 = get_month_date_ranges(2025, current_date)
        all_date_ranges = date_ranges_2024 + date_ranges_2025
        
        # Scrape Patient Analysis data monthly
        for i, (start_date, end_date) in enumerate(all_date_ranges):
            month_key = start_date[:7]  # Format: YYYY-MM
            year = int(month_key[:4])
            month_name = datetime.strptime(month_key, '%Y-%m').strftime('%b %Y')
            
            try:
                productivity_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[span[text()='Productivity Reports']]")))
                productivity_link.click()
                patient_analysis_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]")))
                patient_analysis_link.click()
                select_date(driver, start_date, "txtStartDate")
                select_date(driver, end_date, "txtEndDate")
                search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch")))
                search_button.click()
                time.sleep(5)
                dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox")))
                driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                select = Select(dropdown)
                select.select_by_value("100")
                time.sleep(10)
                patient_data = extract_grid_data_patient_analysis(driver)
                patient_df = pd.DataFrame(patient_data)
                
                if not patient_df.empty:
                    numeric_cols_patient = ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)']
                    for col in numeric_cols_patient:
                        patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce')
                    patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)']
                    patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit']
                    patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit']
                    patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)']
                    patient_df['Month'] = month_name
                    monthly_patient_data[month_key] = patient_df
                    
                    if year == 2024:
                        if patient_data_by_year[2024].empty:
                            patient_data_by_year[2024] = patient_df
                        else:
                            patient_data_by_year[2024] = pd.concat([patient_data_by_year[2024], patient_df])
                    elif year == 2025:
                        if patient_data_by_year[2025].empty:
                            patient_data_by_year[2025] = patient_df
                        else:
                            patient_data_by_year[2025] = pd.concat([patient_data_by_year[2025], patient_df])
            except Exception as e:
                print(f"Error scraping patient data for {month_name}: {str(e)}")

        # Scrape MC by Provider (Yearly: 2024-01-01 to 2024-12-31 and 2025-01-01 to current)
        for period, start_date, end_date in [
            (2024, "2024-01-01", "2024-12-31"),
            (2025, "2025-01-01", current_date)
        ]:
            try:
                productivity_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[span[text()='Productivity Reports']]")))
                productivity_link.click()
                mc_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]")))
                mc_link.click()
                time.sleep(60)
                select_date(driver, start_date, "txtStartDate")
                select_date(driver, end_date, "txtEndDate")
                search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch")))
                search_button.click()
                time.sleep(60)
                dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox")))
                driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                select = Select(dropdown)
                select.select_by_value("100")
                time.sleep(60)
                mc_data = extract_grid_data_mc(driver)
                mc_df = pd.DataFrame(mc_data)
                
                if not mc_df.empty:
                    numeric_cols_mc = ['Total MC Given', 'No. of Visit']
                    for col in numeric_cols_mc:
                        mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce')
                    mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit']) * 100
                    mc_data_by_year[period] = mc_df
            except Exception as e:
                print(f"Error scraping MC data for {period}: {str(e)}")

        # Scrape Claim Summary by Providers (Yearly: 2024-01-01 to 2025-01-01 and 2025-01-01 to current)
        for period, start_date, end_date in [
            (2024, "2024-01-01", "2025-01-01"),
            (2025, "2025-01-01", current_date)
        ]:
            try:
                reg_claims_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]")))
                reg_claims_link.click()
                providers_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]")))
                providers_link.click()
                time.sleep(5)
                select_date_month_day(driver, start_date, "txtFromDate")
                select_date_month_day(driver, end_date, "txtToDate")
                search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch")))
                driver.execute_script("arguments[0].click();", search_button)
                time.sleep(10)
                dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox")))
                driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                select = Select(dropdown)
                select.select_by_value("100")
                time.sleep(5)
                claim_data = extract_grid_data_clm_summary(driver)
                claim_df = pd.DataFrame(claim_data)
                
                if not claim_df.empty:
                    numeric_cols_claim = ['No of Visits', 'Total Claim', 'Total MC (Days)']
                    for col in numeric_cols_claim:
                        claim_df[col] = pd.to_numeric(claim_df[col], errors='coerce')
                    claim_df['Avg Claim per Visit'] = claim_df['Total Claim'] / claim_df['No of Visits']
                    claim_data_by_year[period] = claim_df
            except Exception as e:
                print(f"Error scraping claim data for {period}: {str(e)}")

        return patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data, "Data scraped successfully!"
    except Exception as e:
        return None, None, None, None, f"Error: {str(e)}"
    finally:
        driver.quit()

# --- Plotting Functions ---
def generate_yearly_charts(patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"):
    if not patient_data_by_year or not claim_data_by_year or not mc_data_by_year:
        return None, None
    
    year_int = int(year)
    patient_df = patient_data_by_year.get(year_int, pd.DataFrame())
    claim_df = claim_data_by_year.get(year_int, pd.DataFrame())
    mc_df = mc_data_by_year.get(year_int, pd.DataFrame())
    
    if patient_df.empty or claim_df.empty or mc_df.empty:
        return None, None
    
    # Professional styling
    sns.set(style="whitegrid", palette="muted")
    plt.rcParams.update({
        'font.family': 'Verdana', 'font.size': 12, 'axes.titlesize': 16, 
        'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
        'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
        'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
    })
    
    provider_charts = []
    employee_charts = []

    # --- Provider Dashboard Charts ---
    # 1. Total Visits by Providers
    plt.figure(figsize=(12, 6))
    top_prov_visits = mc_df.sort_values('No. of Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_visits, x='No. of Visit', y='Provider', hue='Provider', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Total Visits ({year})', pad=15)
    ax.set_xlabel('Total Visits')
    ax.set_ylabel('Provider')
    for i, v in enumerate(top_prov_visits['No. of Visit']):
        ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    # 2. Total MC by Providers
    plt.figure(figsize=(12, 6))
    top_prov_mc = mc_df.sort_values('Total MC Given', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_mc, x='Total MC Given', y='Provider', hue='Provider', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Total MC Given ({year})', pad=15)
    ax.set_xlabel('Total MC (Days)')
    ax.set_ylabel('Provider')
    for i, v in enumerate(top_prov_mc['Total MC Given']):
        ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    # 3. % MC Given by Providers
    if show_mc_pct:
        plt.figure(figsize=(18, 9))
        top_visits_provs = set(mc_df.sort_values('No. of Visit', ascending=False).head(10)['Provider'])
        top_mc_provs = set(mc_df.sort_values('Total MC Given', ascending=False).head(10)['Provider'])
        top_provs = top_visits_provs.union(top_mc_provs)
        top_prov_mc_pct = mc_df[mc_df['Provider'].isin(top_provs)].sort_values(
            '% MC Given', ascending=(mc_sort_order == "asc")).head(20)
        ax = sns.barplot(data=top_prov_mc_pct, x='Provider', y='% MC Given', hue='Provider', 
                         palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 20 Providers by % MC Given ({year}) - Sorted {"Ascending" if mc_sort_order == "asc" else "Descending"}', pad=15)
        ax.set_ylabel('% MC Given', fontsize=14)
        ax.set_xlabel('Provider', fontsize=14)
        plt.xticks(rotation=45, ha='right', fontsize=11)
        for i, v in enumerate(top_prov_mc_pct['% MC Given']):
            ax.text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10, color='#333333')
        plt.tight_layout()
        provider_charts.append(plt.gcf())
    else:
        provider_charts.append(None)
    plt.close()

    # 4. Total Claim by Providers
    plt.figure(figsize=(12, 6))
    top_prov_claim = claim_df.sort_values('Total Claim', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_claim, x='Total Claim', y='Provider Name', hue='Provider Name', 
                     palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Total Claim ({year})', pad=15)
    ax.set_xlabel('Total Claim ($)')
    ax.set_ylabel('Provider')
    for i, v in enumerate(top_prov_claim['Total Claim']):
        ax.text(v + 0.5, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    # 5. Average Claim per Visit by Providers
    plt.figure(figsize=(12, 6))
    top_prov_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', 
                     palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})', pad=15)
    ax.set_ylabel('Avg Claim per Visit ($)')
    ax.set_xlabel('Provider')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_prov_avg_claim['Avg Claim per Visit']):
        ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    # --- Employee Yearly Charts ---
    # 1. Total Visits by Employees
    plt.figure(figsize=(12, 6))
    top_emp_visits = patient_df.sort_values('Total Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                     palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Visits ({year})', pad=15)
    ax.set_xlabel('Total Visits')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_visits['Total Visit']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    # 2. Total Claim by Employees
    plt.figure(figsize=(12, 6))
    top_emp_claim = patient_df.sort_values('Total Claim (Combined)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                     palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Claim ({year})', pad=15)
    ax.set_xlabel('Total Claim ($)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
        ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    # 3. Average Claim per Visit by Employees
    plt.figure(figsize=(12, 6))
    top_emp_avg_claim = patient_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                     palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({year})', pad=15)
    ax.set_ylabel('Avg Claim per Visit ($)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
        ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    # 4. Total MC by Employees
    plt.figure(figsize=(12, 6))
    top_emp_mc = patient_df.sort_values('Total MC (Days)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                     palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total MC ({year})', pad=15)
    ax.set_xlabel('Total MC (Days)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_mc['Total MC (Days)']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    # 5. Average MC per Visit by Employees
    plt.figure(figsize=(12, 6))
    top_emp_avg_mc = patient_df.sort_values('Avg MC per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                     palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg MC per Visit ({year})', pad=15)
    ax.set_ylabel('Avg MC per Visit (Days)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
        ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    # 6. Division-wise Claim Distribution
    plt.figure(figsize=(10, 6))
    division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum()
    plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
            startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
    plt.title(f'Claim Distribution by Division ({year})', pad=15)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    provider_images = [fig_to_image(fig) if fig is not None else None for fig in provider_charts]
    employee_images = [fig_to_image(fig) for fig in employee_charts]
    return provider_images, employee_images

def generate_monthly_employee_charts(monthly_patient_data, month):
    if not monthly_patient_data or month not in monthly_patient_data:
        return [None] * 6
    
    patient_df = monthly_patient_data.get(month, pd.DataFrame())
    if patient_df.empty:
        return [None] * 6
    
    # Professional styling
    sns.set(style="whitegrid", palette="muted")
    plt.rcParams.update({
        'font.family': 'Verdana', 'font.size': 12, 'axes.titlesize': 16, 
        'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
        'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
        'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
    })
    
    monthly_employee_charts = []

    # 1. Total Visits by Employees
    plt.figure(figsize=(12, 6))
    top_emp_visits = patient_df.sort_values('Total Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                     palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Visits ({patient_df["Month"].iloc[0]})', pad=15)
    ax.set_xlabel('Total Visits')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_visits['Total Visit']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    # 2. Total Claim by Employees
    plt.figure(figsize=(12, 6))
    top_emp_claim = patient_df.sort_values('Total Claim (Combined)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                     palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Claim ({patient_df["Month"].iloc[0]})', pad=15)
    ax.set_xlabel('Total Claim ($)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
        ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    # 3. Average Claim per Visit by Employees
    plt.figure(figsize=(12, 6))
    top_emp_avg_claim = patient_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                     palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({patient_df["Month"].iloc[0]})', pad=15)
    ax.set_ylabel('Avg Claim per Visit ($)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
        ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    # 4. Total MC by Employees
    plt.figure(figsize=(12, 6))
    top_emp_mc = patient_df.sort_values('Total MC (Days)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                     palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total MC ({patient_df["Month"].iloc[0]})', pad=15)
    ax.set_xlabel('Total MC (Days)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_mc['Total MC (Days)']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333')
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    # 5. Average MC per Visit by Employees
    plt.figure(figsize=(12, 6))
    top_emp_avg_mc = patient_df.sort_values('Avg MC per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                     palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg MC per Visit ({patient_df["Month"].iloc[0]})', pad=15)
    ax.set_ylabel('Avg MC per Visit (Days)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
        ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333')
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    # 6. Division-wise Claim Distribution
    plt.figure(figsize=(10, 6))
    division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum()
    plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
            startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
    plt.title(f'Claim Distribution by Division ({patient_df["Month"].iloc[0]})', pad=15)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    return [fig_to_image(fig) for fig in monthly_employee_charts]

def fig_to_image(fig):
    if fig is None:
        return None
    fig.canvas.draw()
    img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
    plt.close(fig)
    return img_array

# --- Gradio Interface ---
with gr.Blocks(title="Claims Analysis Dashboard", css="""
    body { background-color: #f5f6f5; }
    h1, h2 { color: #333333; font-family: Verdana; }
""") as demo:
    gr.Markdown("# Claims Analysis Dashboard (2024 - Present)")
    
    with gr.Row():
        url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here", lines=1)
        user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID", lines=1)
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password", lines=1)
    scrape_btn = gr.Button("Submit", variant="primary")
    
    with gr.Row():
        year_dropdown = gr.Dropdown(
            label="Select Year for Yearly Data",
            choices=["2024", "2025"],
            value="2024",
            allow_custom_value=False
        )
        month_dropdown = gr.Dropdown(
            label="Select Month for Employee Monthly Data",
            choices=[datetime.strptime(f"{y}-{m:02d}", "%Y-%m").strftime("%b %Y") for y in [2024, 2025] for m in range(1, 13)][:datetime.now().month + 12*(datetime.now().year-2024)],
            value=datetime.now().strftime("%b %Y"),
            allow_custom_value=False
        )
        show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True)
        mc_sort_dropdown = gr.Dropdown(
            label="Sort % MC Given",
            choices=["desc", "asc"],
            value="desc",
            allow_custom_value=False
        )
    
    status_output = gr.Textbox(label="Status", lines=2, interactive=False)
    patient_state = gr.State()
    claim_state = gr.State()
    mc_state = gr.State()
    monthly_patient_state = gr.State()

    with gr.Tabs():
        with gr.TabItem("Provider Insights (Yearly)"):
            gr.Markdown("## Provider Insights Dashboard (Yearly)")
            with gr.Row():
                prov_chart1 = gr.Image(label="Total Visits by Providers", interactive=False)
                prov_chart2 = gr.Image(label="Total MC by Providers", interactive=False)
            with gr.Row():
                prov_chart3 = gr.Image(label="% MC Given by Providers", interactive=False, visible=True)
            with gr.Row():
                prov_chart4 = gr.Image(label="Total Claim by Providers", interactive=False)
                prov_chart5 = gr.Image(label="Average Claim per Visit by Providers", interactive=False)

        with gr.TabItem("Employee Insights (Yearly)"):
            gr.Markdown("## Employee Insights Dashboard (Yearly)")
            with gr.Row():
                emp_chart1 = gr.Image(label="Total Visits by Employees", interactive=False)
                emp_chart2 = gr.Image(label="Total Claim by Employees", interactive=False)
            with gr.Row():
                emp_chart3 = gr.Image(label="Average Claim per Visit by Employees", interactive=False)
                emp_chart4 = gr.Image(label="Total MC by Employees", interactive=False)
            with gr.Row():
                emp_chart5 = gr.Image(label="Average MC per Visit by Employees", interactive=False)
                emp_chart6 = gr.Image(label="Claim Distribution by Division", interactive=False)

        with gr.TabItem("Employee Insights (Monthly)"):
            gr.Markdown("## Employee Insights Dashboard (Monthly)")
            with gr.Row():
                monthly_emp_chart1 = gr.Image(label="Total Visits by Employees", interactive=False)
                monthly_emp_chart2 = gr.Image(label="Total Claim by Employees", interactive=False)
            with gr.Row():
                monthly_emp_chart3 = gr.Image(label="Average Claim per Visit by Employees", interactive=False)
                monthly_emp_chart4 = gr.Image(label="Total MC by Employees", interactive=False)
            with gr.Row():
                monthly_emp_chart5 = gr.Image(label="Average MC per Visit by Employees", interactive=False)
                monthly_emp_chart6 = gr.Image(label="Claim Distribution by Division", interactive=False)

    def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order):
        patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data, status = scrape_data(url, user_id, password)
        if patient_data_by_year is None or claim_data_by_year is None or mc_data_by_year is None:
            return status, None, None, None, None, *[None] * 17
        
        provider_images, employee_images = generate_yearly_charts(
            patient_data_by_year, claim_data_by_year, mc_data_by_year, "2024", show_mc_pct, mc_sort_order)
        monthly_employee_images = generate_monthly_employee_charts(
            monthly_patient_data, datetime.now().strftime("%b %Y").replace(" ", f" {datetime.now().year}"))
        
        return (
            status, patient_data_by_year, claim_data_by_year, mc_data_by_year,
            monthly_patient_data,
            provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
            employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5],
            monthly_employee_images[0], monthly_employee_images[1], monthly_employee_images[2], 
            monthly_employee_images[3], monthly_employee_images[4], monthly_employee_images[5]
        )

    def update_yearly_dashboard(year, patient_data_by_year, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        if not patient_data_by_year or not claim_data_by_year or not mc_data_by_year:
            return [None] * 11
        provider_images, employee_images = generate_yearly_charts(
            patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
        return (
            provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
            employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
        )

    def update_monthly_dashboard(month, monthly_patient_data):
        month_key = datetime.strptime(month, "%b %Y").strftime("%Y-%m")
        monthly_employee_images = generate_monthly_employee_charts(monthly_patient_data, month_key)
        return monthly_employee_images

    scrape_btn.click(
        fn=scrape_and_store,
        inputs=[url_input, user_id_input, password_input, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            status_output, patient_state, claim_state, mc_state,
            monthly_patient_state,
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6,
            monthly_emp_chart1, monthly_emp_chart2, monthly_emp_chart3, 
            monthly_emp_chart4, monthly_emp_chart5, monthly_emp_chart6
        ]
    )

    year_dropdown.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    show_mc_pct_checkbox.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    mc_sort_dropdown.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    month_dropdown.change(
        fn=update_monthly_dashboard,
        inputs=[month_dropdown, monthly_patient_state],
        outputs=[
            monthly_emp_chart1, monthly_emp_chart2, monthly_emp_chart3, 
            monthly_emp_chart4, monthly_emp_chart5, monthly_emp_chart6
        ]
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://ded0f044a685adcd03.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Error scraping patient data for Feb 2024: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: MicrosoftEdge=133.0.3065.92)
Stacktrace:
	GetHandleVerifier [0x00007FF7AE0DE735+13397]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF7AE36B234+2060404]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF7AE2C2338+1368440]
	(No symbol) [0x00007FF7ADE59510]
	(No symbol) [0x00007FF7ADE770CA]
	(No symbol) [0x00007FF7ADEDA0E6]
	(No symbol) [0x00007FF7ADEF13AD]
	(No symbol) [0x00007FF7ADED3FC3]
	(No symbol) [0x00007FF7ADEA8B96]
	(No symbol) [0x00007FF7ADEA7E50]
	(No symbol) [0x00007FF7ADEA89C3]
	(No symbol) [0x00007FF7ADF30214]
	(No symbol) [0x00007FF7ADFC06CF]
	(No symbol) [0x00007FF7ADF33A53]
	Microsoft::Applications::Events::EventProperty::to_string [0x00007FF7AE42463D+279981]
	simdutf::get_active_implementation [0x00007FF7AE0678C1+409425]
	simdutf::get_active_implement

Traceback (most recent call last):
  File "C:\Users\DELL-INTERN-HR\anaconda3\Lib\site-packages\gradio\queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL-INTERN-HR\anaconda3\Lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL-INTERN-HR\anaconda3\Lib\site-packages\gradio\blocks.py", line 2098, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL-INTERN-HR\anaconda3\Lib\site-packages\gradio\blocks.py", line 1645, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL-INTERN-HR\anaconda3\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
    return await get

In [1]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from datetime import datetime
import time
import numpy as np
import calendar
import requests
from selenium.common.exceptions import WebDriverException
import traceback

# --- Scraping Functions ---
def wait_for_element(driver, locator, timeout=10):
    return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))

def select_date_month_day(driver, date_str, date_input_id):
    date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
    date_input = wait_for_element(driver, (By.ID, date_input_id))
    date_input.click()
    month_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-month')))
    month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
    month_option.click()
    day = date_to_select.day
    day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
    day_element.click()
    time.sleep(2)

def select_date(driver, date_str, date_input_id):
    date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
    date_input = wait_for_element(driver, (By.ID, date_input_id))
    date_input.click()
    month_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-month')))
    month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']")
    month_option.click()
    year_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-year')))
    year_option = year_select.find_element(By.XPATH, f"//option[@value='{date_to_select.year}']")
    year_option.click()
    day = date_to_select.day
    day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))
    day_element.click()
    time.sleep(2)

def extract_grid_data_clm_summary(driver):
    data = []
    try:
        total_pages_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "sp_1_pjqgridClmSummbyProv")))
        total_pages = int(total_pages_element.text.strip())
    except:
        return data
    for current_page in range(1, total_pages + 1):
        time.sleep(2)
        driver.execute_script("window.scrollTo(0, 0);")
        grid = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "jqgridClmSummbyProv")))
        rows = WebDriverWait(grid, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
        for row in rows:
            try:
                provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text
                visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text
                claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text or '0'
                data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc})
            except Exception:
                continue
        if current_page < total_pages:
            try:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=5)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 10).until(EC.staleness_of(rows[0]))
            except:
                break
    return data

def extract_grid_data_patient_analysis(driver):
    all_data = []
    while True:
        driver.execute_script("window.scrollTo(0, 0);")
        grid = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "jqgridCorpMcAnalysis")))
        rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow")
        for row in rows:
            try:
                employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text
                employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text
                division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text
                total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text
                total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text
                total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text
                all_data.append({
                    'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division,
                    'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own,
                    'Total Claim (Dep)': total_claim_dep
                })
            except:
                continue
        try:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=5)
            parent_div = next_button.find_element(By.XPATH, "./parent::div")
            if "disabled" in parent_div.get_attribute("class"):
                break
            driver.execute_script("arguments[0].scrollIntoView(true);", parent_div)
            parent_div.click()
            WebDriverWait(driver, 10).until(EC.staleness_of(rows[0]))
        except:
            break
    return all_data

def extract_grid_data_mc(driver):
    data = []
    try:
        total_pages_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "sp_1_jqgrid")))
        total_pages = int(total_pages_element.text.strip())
    except:
        total_pages = 1
    for current_page in range(1, total_pages + 1):
        time.sleep(2)
        driver.execute_script("window.scrollTo(0, 0);")
        grid = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "jqgrid")))
        rows = WebDriverWait(grid, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow")))
        for row in rows:
            try:
                provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip()
                total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip()
                total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip()
                data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits})
            except:
                continue
        if current_page < total_pages:
            try:
                next_button_div = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"), timeout=5)
                driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div)
                next_button_div.click()
                WebDriverWait(driver, 10).until(EC.staleness_of(rows[0]))
            except:
                break
    return data

def get_month_date_ranges(year, end_date=None):
    date_ranges = []
    current_date = datetime.now() if end_date is None else datetime.strptime(end_date, '%Y-%m-%d')
    max_month = current_date.month if year == current_date.year else 12
    for month in range(1, max_month + 1):
        start_date = datetime(year, month, 1)
        _, last_day = calendar.monthrange(year, month)
        end_date_month = datetime(year, month, last_day)
        if end_date_month > current_date:
            end_date_month = current_date
        date_ranges.append((start_date.strftime('%Y-%m-%d'), end_date_month.strftime('%Y-%m-%d')))
    return date_ranges

def scrape_data(url, user_id, password):
    edge_options = Options()
    service = Service(EdgeChromiumDriverManager().install(), port=0)
    driver = webdriver.Edge(service=service, options=edge_options)
    
    monthly_patient_data = {}
    patient_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    mc_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    claim_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    current_date = datetime.now().strftime('%Y-%m-%d')
    
    try:
        response = requests.get(url, timeout=10)
        if response.status_code != 200:
            return None, None, None, None, f"Error: Website inaccessible (Status: {response.status_code})"
        
        driver.get(url)
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        image.click()
        user_id_field = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "txtloginid")))
        user_id_field.send_keys(user_id)
        password_field = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "inputpss")))
        password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        sign_button.click()
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        continue_button.click()

        date_ranges_2024 = get_month_date_ranges(2024, "2024-12-31")
        date_ranges_2025 = get_month_date_ranges(2025, current_date)
        all_date_ranges = date_ranges_2024 + date_ranges_2025
        
        # Patient data scraping
        for start_date, end_date in all_date_ranges:
            month_key = start_date[:7]
            year = int(month_key[:4])
            month_name = datetime.strptime(month_key, '%Y-%m').strftime('%b %Y')
            retries = 3
            for attempt in range(retries):
                try:
                    productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
                    productivity_link.click()
                    patient_analysis_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]"))
                    patient_analysis_link.click()
                    select_date(driver, start_date, "txtStartDate")
                    select_date(driver, end_date, "txtEndDate")
                    search_button = wait_for_element(driver, (By.ID, "btnSearch"))
                    search_button.click()
                    WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.ID, "jqgridCorpMcAnalysis")))
                    dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                    driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                    select = Select(dropdown)
                    select.select_by_value("100")
                    time.sleep(5)
                    patient_data = extract_grid_data_patient_analysis(driver)
                    patient_df = pd.DataFrame(patient_data)
                    
                    if not patient_df.empty:
                        numeric_cols = ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)']
                        for col in numeric_cols:
                            patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce')
                        patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)']
                        patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit']
                        patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit']
                        patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)']
                        patient_df['Month'] = month_name
                        monthly_patient_data[month_key] = patient_df
                        
                        if year == 2024:
                            if patient_data_by_year[2024].empty:
                                patient_data_by_year[2024] = patient_df
                            else:
                                patient_data_by_year[2024] = pd.concat([patient_data_by_year[2024], patient_df]).groupby(
                                    ['Employee Name', 'Employee No', 'Division/Department'], as_index=False).agg({
                                        'Total Visit': 'sum', 'Total MC (Days)': 'sum', 'Total Claim (Own)': 'sum', 
                                        'Total Claim (Dep)': 'sum', 'Total Claim (Combined)': 'sum'
                                    }).assign(**{
                                        'Avg Claim per Visit': lambda x: x['Total Claim (Combined)'] / x['Total Visit'],
                                        'Avg MC per Visit': lambda x: x['Total MC (Days)'] / x['Total Visit'],
                                        'Avg Claim per MC': lambda x: x['Total Claim (Combined)'] / x['Total MC (Days)']
                                    })
                        elif year == 2025:
                            if patient_data_by_year[2025].empty:
                                patient_data_by_year[2025] = patient_df
                            else:
                                patient_data_by_year[2025] = pd.concat([patient_data_by_year[2025], patient_df]).groupby(
                                    ['Employee Name', 'Employee No', 'Division/Department'], as_index=False).agg({
                                        'Total Visit': 'sum', 'Total MC (Days)': 'sum', 'Total Claim (Own)': 'sum', 
                                        'Total Claim (Dep)': 'sum', 'Total Claim (Combined)': 'sum'
                                    }).assign(**{
                                        'Avg Claim per Visit': lambda x: x['Total Claim (Combined)'] / x['Total Visit'],
                                        'Avg MC per Visit': lambda x: x['Total MC (Days)'] / x['Total Visit'],
                                        'Avg Claim per MC': lambda x: x['Total Claim (Combined)'] / x['Total MC (Days)']
                                    })
                    break
                except Exception as e:
                    error_details = traceback.format_exc()
                    print(f"Attempt {attempt + 1} failed for {month_name}: {str(e)}\nDetails:\n{error_details}")
                    if attempt == retries - 1:
                        print(f"Error scraping patient data for {month_name}: {str(e)}\nFull traceback:\n{error_details}")
                    time.sleep(5)

        # MC by Provider
        for period, start_date, end_date in [(2024, "2024-01-01", "2024-12-31"), (2025, "2025-01-01", current_date)]:
            retries = 3
            for attempt in range(retries):
                try:
                    productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
                    productivity_link.click()
                    mc_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]"))
                    mc_link.click()
                    time.sleep(60)
                    select_date(driver, start_date, "txtStartDate")
                    select_date(driver, end_date, "txtEndDate")
                    search_button = wait_for_element(driver, (By.ID, "btnSearch"))
                    search_button.click()
                    time.sleep(60)
                    WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.ID, "jqgrid")))
                    dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                    driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                    select = Select(dropdown)
                    select.select_by_value("100")
                    time.sleep(50)
                    mc_data = extract_grid_data_mc(driver)
                    mc_df = pd.DataFrame(mc_data)
                    
                    if not mc_df.empty:
                        numeric_cols = ['Total MC Given', 'No. of Visit']
                        for col in numeric_cols:
                            mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce')
                        mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit']) * 100
                        mc_data_by_year[period] = mc_df
                    break
                except Exception as e:
                    error_details = traceback.format_exc()
                    print(f"Attempt {attempt + 1} failed for MC {period}: {str(e)}\nDetails:\n{error_details}")
                    if attempt == retries - 1:
                        print(f"Error scraping MC data for {period}: {str(e)}\nFull traceback:\n{error_details}")
                    time.sleep(5)

        # Claim Summary by Providers
        for period, start_date, end_date in [(2024, "2024-01-01", "2025-01-01"), (2025, "2025-01-01", current_date)]:
            retries = 3
            for attempt in range(retries):
                try:
                    print(f"Attempting to scrape claim data for {period}, attempt {attempt + 1}")
                    reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
                    print(f"Found Registration & Claims link for {period}")
                    reg_claims_link.click()
                    
                    providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
                    print(f"Found Claim Summary by Providers link for {period}")
                    providers_link.click()
                    
                    select_date_month_day(driver, start_date, "txtFromDate")
                    print(f"Set start date {start_date} for {period}")
                    select_date_month_day(driver, end_date, "txtToDate")
                    print(f"Set end date {end_date} for {period}")
                    
                    search_button = wait_for_element(driver, (By.ID, "btnSearch"))
                    print(f"Found search button for {period}")
                    driver.execute_script("arguments[0].click();", search_button)
                    
                    WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, "jqgridClmSummbyProv")))
                    print(f"Grid loaded for {period}")
                    
                    dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                    driver.execute_script("arguments[0].scrollIntoView(true);", dropdown)
                    select = Select(dropdown)
                    select.select_by_value("100")
                    print(f"Set rows to 100 for {period}")
                    time.sleep(5)
                    
                    claim_data = extract_grid_data_clm_summary(driver)
                    claim_df = pd.DataFrame(claim_data)
                    
                    if not claim_df.empty:
                        numeric_cols = ['No of Visits', 'Total Claim', 'Total MC (Days)']
                        for col in numeric_cols:
                            claim_df[col] = pd.to_numeric(claim_df[col], errors='coerce')
                        claim_df['Avg Claim per Visit'] = claim_df['Total Claim'] / claim_df['No of Visits']
                        claim_data_by_year[period] = claim_df
                        print(f"Successfully scraped claim data for {period}")
                    else:
                        print(f"No claim data found for {period}")
                    break
                except Exception as e:
                    error_details = traceback.format_exc()
                    print(f"Attempt {attempt + 1} failed for claim {period}: {str(e)}\nDetails:\n{error_details}")
                    if attempt == retries - 1:
                        print(f"Error scraping claim data for {period}: {str(e)}\nFull traceback:\n{error_details}")
                    time.sleep(5)

        return patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data, "Data scraped successfully!"
    except Exception as e:
        error_details = traceback.format_exc()
        return None, None, None, None, f"Error: {str(e)}\nFull traceback:\n{error_details}"
    finally:
        driver.quit()

# --- Plotting Functions ---
def generate_yearly_charts(patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"):
    if not patient_data_by_year or not claim_data_by_year or not mc_data_by_year:
        return [None] * 5, [None] * 6
    
    year_int = int(year)
    patient_df = patient_data_by_year.get(year_int, pd.DataFrame())
    claim_df = claim_data_by_year.get(year_int, pd.DataFrame())
    mc_df = mc_data_by_year.get(year_int, pd.DataFrame())
    
    if patient_df.empty or claim_df.empty or mc_df.empty:
        return [None] * 5, [None] * 6
    
    sns.set(style="whitegrid", palette="muted")
    plt.rcParams.update({
        'font.family': 'Verdana', 'font.size': 12, 'axes.titlesize': 16, 
        'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
        'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
        'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
    })
    
    provider_charts = []
    employee_charts = []

    plt.figure(figsize=(12, 8))
    top_prov_visits = mc_df.sort_values('No. of Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_visits, x='No. of Visit', y='Provider', hue='Provider', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Total Visits ({year})', pad=20)
    ax.set_xlabel('Total Visits')
    ax.set_ylabel('Provider')
    for i, v in enumerate(top_prov_visits['No. of Visit']):
        ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10)
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_prov_mc = mc_df.sort_values('Total MC Given', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_mc, x='Total MC Given', y='Provider', hue='Provider', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Total MC Given ({year})', pad=20)
    ax.set_xlabel('Total MC (Days)')
    ax.set_ylabel('Provider')
    for i, v in enumerate(top_prov_mc['Total MC Given']):
        ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10)
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    if show_mc_pct:
        plt.figure(figsize=(18, 10))
        top_visits_provs = set(mc_df.sort_values('No. of Visit', ascending=False).head(10)['Provider'])
        top_mc_provs = set(mc_df.sort_values('Total MC Given', ascending=False).head(10)['Provider'])
        top_provs = top_visits_provs.union(top_mc_provs)
        top_prov_mc_pct = mc_df[mc_df['Provider'].isin(top_provs)].sort_values(
            '% MC Given', ascending=(mc_sort_order == "asc")).head(20)
        ax = sns.barplot(data=top_prov_mc_pct, x='Provider', y='% MC Given', hue='Provider', 
                         palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
        ax.set_title(f'Top 20 Providers by % MC Given ({year}) - Sorted {"Ascending" if mc_sort_order == "asc" else "Descending"}', pad=20)
        ax.set_ylabel('% MC Given')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_prov_mc_pct['% MC Given']):
            ax.text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10)
        plt.tight_layout()
        provider_charts.append(plt.gcf())
    else:
        provider_charts.append(None)
    plt.close()

    plt.figure(figsize=(12, 8))
    top_prov_claim = claim_df.sort_values('Total Claim', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_claim, x='Total Claim', y='Provider Name', hue='Provider Name', 
                     palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Total Claim ({year})', pad=20)
    ax.set_xlabel('Total Claim ($)')
    ax.set_ylabel('Provider')
    for i, v in enumerate(top_prov_claim['Total Claim']):
        ax.text(v + 0.5, i, f'{v:,.2f}', va='center', fontsize=10)
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_prov_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_prov_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', 
                     palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})', pad=20)
    ax.set_ylabel('Avg Claim per Visit ($)')
    ax.set_xlabel('Provider')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_prov_avg_claim['Avg Claim per Visit']):
        ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10)
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_visits = patient_df.sort_values('Total Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                     palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Visits ({year})', pad=20)
    ax.set_xlabel('Total Visits')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_visits['Total Visit']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_claim = patient_df.sort_values('Total Claim (Combined)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                     palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Claim ({year})', pad=20)
    ax.set_xlabel('Total Claim ($)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
        ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_avg_claim = patient_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                     palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({year})', pad=20)
    ax.set_ylabel('Avg Claim per Visit ($)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
        ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_mc = patient_df.sort_values('Total MC (Days)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                     palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total MC ({year})', pad=20)
    ax.set_xlabel('Total MC (Days)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_mc['Total MC (Days)']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_avg_mc = patient_df.sort_values('Avg MC per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                     palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg MC per Visit ({year})', pad=20)
    ax.set_ylabel('Avg MC per Visit (Days)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
        ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(10, 10))
    division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum()
    plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
            startangle=90, textprops={'fontsize': 11}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
    plt.title(f'Claim Distribution by Division ({year})', pad=20)
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    provider_images = [fig_to_image(fig) if fig is not None else None for fig in provider_charts]
    employee_images = [fig_to_image(fig) for fig in employee_charts]
    return provider_images, employee_images

def generate_monthly_employee_charts(monthly_patient_data, month):
    if not monthly_patient_data or month not in monthly_patient_data:
        return [None] * 6
    
    patient_df = monthly_patient_data.get(month, pd.DataFrame())
    if patient_df.empty:
        return [None] * 6
    
    sns.set(style="whitegrid", palette="muted")
    plt.rcParams.update({
        'font.family': 'Verdana', 'font.size': 12, 'axes.titlesize': 16, 
        'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 
        'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff',
        'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333'
    })
    
    monthly_employee_charts = []

    plt.figure(figsize=(12, 8))
    top_emp_visits = patient_df.sort_values('Total Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', 
                     palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Visits ({patient_df["Month"].iloc[0]})', pad=20)
    ax.set_xlabel('Total Visits')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_visits['Total Visit']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_claim = patient_df.sort_values('Total Claim (Combined)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', 
                     palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total Claim ({patient_df["Month"].iloc[0]})', pad=20)
    ax.set_xlabel('Total Claim ($)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_claim['Total Claim (Combined)']):
        ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_avg_claim = patient_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', 
                     palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({patient_df["Month"].iloc[0]})', pad=20)
    ax.set_ylabel('Avg Claim per Visit ($)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']):
        ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_mc = patient_df.sort_values('Total MC (Days)', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', 
                     palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Total MC ({patient_df["Month"].iloc[0]})', pad=20)
    ax.set_xlabel('Total MC (Days)')
    ax.set_ylabel('Employee')
    for i, v in enumerate(top_emp_mc['Total MC (Days)']):
        ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(12, 8))
    top_emp_avg_mc = patient_df.sort_values('Avg MC per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', 
                     palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5)
    ax.set_title(f'Top 10 Employees by Avg MC per Visit ({patient_df["Month"].iloc[0]})', pad=20)
    ax.set_ylabel('Avg MC per Visit (Days)')
    ax.set_xlabel('Employee')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']):
        ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    plt.figure(figsize=(10, 10))
    division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum()
    plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), 
            startangle=90, textprops={'fontsize': 11}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
    plt.title(f'Claim Distribution by Division ({patient_df["Month"].iloc[0]})', pad=20)
    plt.tight_layout()
    monthly_employee_charts.append(plt.gcf())
    plt.close()

    return [fig_to_image(fig) for fig in monthly_employee_charts]

def fig_to_image(fig):
    if fig is None:
        return None
    fig.canvas.draw()
    img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
    plt.close(fig)
    return img_array

# --- Gradio Interface ---
with gr.Blocks(title="Claims Analysis Dashboard", css="""
    body { background-color: #f5f6f5; }
    h1, h2 { color: #333333; font-family: Verdana; }
    .gr-image { max-width: 100%; height: auto; }
""") as demo:
    gr.Markdown("# Claims Analysis Dashboard (2024 - Present)")
    
    with gr.Row():
        url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here", lines=1)
        user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID", lines=1)
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password", lines=1)
    scrape_btn = gr.Button("Submit", variant="primary")
    
    with gr.Row():
        year_dropdown = gr.Dropdown(label="Select Year", choices=["2024", "2025"], value="2024")
        month_dropdown = gr.Dropdown(
            label="Select Month",
            choices=[datetime.strptime(f"{y}-{m:02d}", "%Y-%m").strftime("%b %Y") for y in [2024, 2025] for m in range(1, 13)][:datetime.now().month + 12*(datetime.now().year-2024)],
            value=datetime.now().strftime("%b %Y")
        )
        show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True)
        mc_sort_dropdown = gr.Dropdown(label="Sort % MC Given", choices=["desc", "asc"], value="desc")
    
    status_output = gr.Textbox(label="Status", lines=2, interactive=False)
    patient_state = gr.State()
    claim_state = gr.State()
    mc_state = gr.State()
    monthly_patient_state = gr.State()

    with gr.Tabs():
        with gr.TabItem("Provider Insights (Yearly)"):
            gr.Markdown("## Provider Insights Dashboard (Yearly)")
            with gr.Row():
                prov_chart1 = gr.Image(label="Total Visits by Providers")
                prov_chart2 = gr.Image(label="Total MC by Providers")
            with gr.Row():
                prov_chart3 = gr.Image(label="% MC Given by Providers", visible=True)
            with gr.Row():
                prov_chart4 = gr.Image(label="Total Claim by Providers")
                prov_chart5 = gr.Image(label="Average Claim per Visit by Providers")

        with gr.TabItem("Employee Insights (Yearly)"):
            gr.Markdown("## Employee Insights Dashboard (Yearly)")
            with gr.Row():
                emp_chart1 = gr.Image(label="Total Visits by Employees")
                emp_chart2 = gr.Image(label="Total Claim by Employees")
            with gr.Row():
                emp_chart3 = gr.Image(label="Average Claim per Visit by Employees")
                emp_chart4 = gr.Image(label="Total MC by Employees")
            with gr.Row():
                emp_chart5 = gr.Image(label="Average MC per Visit by Employees")
                emp_chart6 = gr.Image(label="Claim Distribution by Division")

        with gr.TabItem("Employee Insights (Monthly)"):
            gr.Markdown("## Employee Insights Dashboard (Monthly)")
            with gr.Row():
                monthly_emp_chart1 = gr.Image(label="Total Visits by Employees")
                monthly_emp_chart2 = gr.Image(label="Total Claim by Employees")
            with gr.Row():
                monthly_emp_chart3 = gr.Image(label="Average Claim per Visit by Employees")
                monthly_emp_chart4 = gr.Image(label="Total MC by Employees")
            with gr.Row():
                monthly_emp_chart5 = gr.Image(label="Average MC per Visit by Employees")
                monthly_emp_chart6 = gr.Image(label="Claim Distribution by Division")

    def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order):
        patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data, status = scrape_data(url, user_id, password)
        if patient_data_by_year is None or claim_data_by_year is None or mc_data_by_year is None:
            return status, None, None, None, None, *[None] * 17
        
        provider_images = [None] * 5
        employee_images = [None] * 6
        monthly_employee_images = [None] * 6
        
        try:
            provider_images, employee_images = generate_yearly_charts(
                patient_data_by_year, claim_data_by_year, mc_data_by_year, "2024", show_mc_pct, mc_sort_order)
            month_key = datetime.now().strftime("%Y-%m")
            monthly_employee_images = generate_monthly_employee_charts(monthly_patient_data, month_key)
        except Exception as e:
            status += f"\nChart generation failed: {str(e)}"
        
        return (
            status, patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data,
            provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
            employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5],
            monthly_employee_images[0], monthly_employee_images[1], monthly_employee_images[2], 
            monthly_employee_images[3], monthly_employee_images[4], monthly_employee_images[5]
        )

    def update_yearly_dashboard(year, patient_data_by_year, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        if not patient_data_by_year or not claim_data_by_year or not mc_data_by_year:
            return [None] * 11
        provider_images, employee_images = generate_yearly_charts(
            patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
        return (
            provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4],
            employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5]
        )

    def update_monthly_dashboard(month, monthly_patient_data):
        month_key = datetime.strptime(month, "%b %Y").strftime("%Y-%m")
        monthly_employee_images = generate_monthly_employee_charts(monthly_patient_data, month_key)
        return monthly_employee_images

    scrape_btn.click(
        fn=scrape_and_store,
        inputs=[url_input, user_id_input, password_input, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            status_output, patient_state, claim_state, mc_state, monthly_patient_state,
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6,
            monthly_emp_chart1, monthly_emp_chart2, monthly_emp_chart3, 
            monthly_emp_chart4, monthly_emp_chart5, monthly_emp_chart6
        ]
    )

    year_dropdown.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    show_mc_pct_checkbox.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    mc_sort_dropdown.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6
        ]
    )

    month_dropdown.change(
        fn=update_monthly_dashboard,
        inputs=[month_dropdown, monthly_patient_state],
        outputs=[
            monthly_emp_chart1, monthly_emp_chart2, monthly_emp_chart3, 
            monthly_emp_chart4, monthly_emp_chart5, monthly_emp_chart6
        ]
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://b3ac6b304a53e32e79.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [1]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from datetime import datetime
import time
import numpy as np
import calendar
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException

# --- Scraping Functions ---
def wait_for_element(driver, locator, timeout=10):
    try:
        return WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(locator))
    except TimeoutException:
        print(f"Timeout waiting for element: {locator}")
        return None

def select_date(driver, date_str, date_input_id):
    try:
        date_to_select = datetime.strptime(date_str, '%Y-%m-%d')
        date_input = wait_for_element(driver, (By.ID, date_input_id))
        if not date_input:
            return False
        date_input.click()
        time.sleep(1)
        month_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-month'))
        if not month_select:
            return False
        Select(month_select).select_by_value(str(date_to_select.month - 1))
        year_select = wait_for_element(driver, (By.CLASS_NAME, 'ui-datepicker-year'))
        if not year_select:
            return False
        Select(year_select).select_by_value(str(date_to_select.year))
        day_element = wait_for_element(driver, (By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{date_to_select.day}']"))
        if day_element:
            day_element.click()
            time.sleep(2)
            return True
        return False
    except Exception as e:
        print(f"Error selecting date {date_str}: {e}")
        return False

def extract_grid_data_clm_summary(driver):
    data = []
    try:
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_pjqgridClmSummbyProv"))
        total_pages = int(total_pages_element.text.strip()) if total_pages_element else 1
    except:
        total_pages = 1
    for _ in range(total_pages):
        time.sleep(2)
        grid = wait_for_element(driver, (By.ID, "jqgridClmSummbyProv"))
        if not grid:
            break
        rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow")
        for row in rows:
            try:
                provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text.strip()
                visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text.strip()
                claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text.strip()
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text.strip() or '0'
                data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc})
            except:
                continue
        if _ < total_pages - 1:
            next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"))
            if next_button:
                driver.execute_script("arguments[0].click();", next_button)
                time.sleep(2)
            else:
                break
    return data

def extract_grid_data_patient_analysis(driver):
    data = []
    while True:
        grid = wait_for_element(driver, (By.ID, "jqgridCorpMcAnalysis"))
        if not grid:
            break
        rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow")
        for row in rows:
            try:
                employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text.strip()
                employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text.strip()
                division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text.strip()
                total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text.strip()
                total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text.strip()
                total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text.strip()
                total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text.strip()
                data.append({
                    'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division,
                    'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own,
                    'Total Claim (Dep)': total_claim_dep
                })
            except:
                continue
        next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"))
        if not next_button or "disabled" in next_button.find_element(By.XPATH, "./parent::div").get_attribute("class"):
            break
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(2)
    return data

def extract_grid_data_mc(driver):
    data = []
    try:
        total_pages_element = wait_for_element(driver, (By.ID, "sp_1_jqgrid"))
        total_pages = int(total_pages_element.text.strip()) if total_pages_element else 1
    except:
        total_pages = 1
    for _ in range(total_pages):
        time.sleep(2)
        grid = wait_for_element(driver, (By.ID, "jqgrid"))
        if not grid:
            break
        rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow")
        for row in rows:
            try:
                provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip()
                total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip()
                total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip()
                data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits})
            except:
                continue
        if _ < total_pages - 1:
            next_button = wait_for_element(driver, (By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"))
            if next_button:
                driver.execute_script("arguments[0].click();", next_button)
                time.sleep(2)
            else:
                break
    return data

def get_month_date_ranges(year, end_date=None):
    date_ranges = []
    current_date = datetime.now() if end_date is None else datetime.strptime(end_date, '%Y-%m-%d')
    for month in range(1, 13):
        if year == current_date.year and month > current_date.month:
            break
        start_date = datetime(year, month, 1)
        _, last_day = calendar.monthrange(year, month)
        end_date_month = datetime(year, month, last_day)
        if end_date_month > current_date:
            end_date_month = current_date
        date_ranges.append((start_date.strftime('%Y-%m-%d'), end_date_month.strftime('%Y-%m-%d')))
    return date_ranges

def scrape_data(url, user_id, password):
    edge_options = Options()
    driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options)
    monthly_patient_data = {}
    patient_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    mc_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    claim_data_by_year = {2024: pd.DataFrame(), 2025: pd.DataFrame()}
    current_date = datetime.now().strftime('%Y-%m-%d')
    
    try:
        driver.get(url)
        image = wait_for_element(driver, (By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))
        if not image:
            return None, None, None, None, "Login image not found"
        image.click()
        user_id_field = wait_for_element(driver, (By.NAME, "txtloginid"))
        if user_id_field:
            user_id_field.send_keys(user_id)
        password_field = wait_for_element(driver, (By.ID, "inputpss"))
        if password_field:
            password_field.send_keys(password)
        sign_button = wait_for_element(driver, (By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))
        if sign_button:
            sign_button.click()
        continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']"))
        if continue_button:
            continue_button.click()
        else:
            return None, None, None, None, "Continue button not found"

        # Patient Analysis
        date_ranges = get_month_date_ranges(2024, current_date) + get_month_date_ranges(2025, current_date)
        for start_date, end_date in date_ranges:
            month_key = start_date[:7]
            year = int(month_key[:4])
            month_name = datetime.strptime(month_key, '%Y-%m').strftime('%b %Y')
            productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
            if productivity_link:
                productivity_link.click()
            patient_analysis_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]"))
            if patient_analysis_link:
                patient_analysis_link.click()
            if not (select_date(driver, start_date, "txtStartDate") and select_date(driver, end_date, "txtEndDate")):
                continue
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            if search_button:
                search_button.click()
                time.sleep(5)
                dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                if dropdown:
                    Select(dropdown).select_by_value("100")
                    time.sleep(5)
                    patient_data = extract_grid_data_patient_analysis(driver)
                    patient_df = pd.DataFrame(patient_data)
                    if not patient_df.empty:
                        for col in ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)']:
                            patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce').fillna(0)
                        patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)']
                        patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit'].replace(0, np.nan)
                        patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit'].replace(0, np.nan)
                        patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)'].replace(0, np.nan)
                        patient_df['Month'] = month_name
                        monthly_patient_data[month_key] = patient_df
                        patient_data_by_year[year] = pd.concat([patient_data_by_year[year], patient_df]) if not patient_data_by_year[year].empty else patient_df

        # MC by Provider
        for year, start_date, end_date in [(2024, "2024-01-01", "2024-12-31"), (2025, "2025-01-01", current_date)]:
            productivity_link = wait_for_element(driver, (By.XPATH, "//a[span[text()='Productivity Reports']]"))
            if productivity_link:
                productivity_link.click()
            mc_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]"))
            if mc_link:
                mc_link.click()
            if not (select_date(driver, start_date, "txtStartDate") and select_date(driver, end_date, "txtEndDate")):
                continue
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            if search_button:
                search_button.click()
                time.sleep(5)
                dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                if dropdown:
                    Select(dropdown).select_by_value("100")
                    time.sleep(5)
                    mc_data = extract_grid_data_mc(driver)
                    mc_df = pd.DataFrame(mc_data)
                    if not mc_df.empty:
                        for col in ['Total MC Given', 'No. of Visit']:
                            mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce').fillna(0)
                        mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit'].replace(0, np.nan)) * 100
                        mc_data_by_year[year] = mc_df

        # Claim Summary by Providers
        for year, start_date, end_date in [(2024, "2024-01-01", "2025-01-01"), (2025, "2025-01-01", current_date)]:
            reg_claims_link = wait_for_element(driver, (By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))
            if reg_claims_link:
                reg_claims_link.click()
            providers_link = wait_for_element(driver, (By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))
            if providers_link:
                providers_link.click()
            if not (select_date(driver, start_date, "txtFromDate") and select_date(driver, end_date, "txtToDate")):
                continue
            search_button = wait_for_element(driver, (By.ID, "btnSearch"))
            if search_button:
                driver.execute_script("arguments[0].click();", search_button)
                time.sleep(5)
                dropdown = wait_for_element(driver, (By.CSS_SELECTOR, "select.ui-pg-selbox"))
                if dropdown:
                    Select(dropdown).select_by_value("100")
                    time.sleep(5)
                    claim_data = extract_grid_data_clm_summary(driver)
                    claim_df = pd.DataFrame(claim_data)
                    if not claim_df.empty:
                        for col in ['No of Visits', 'Total Claim', 'Total MC (Days)']:
                            claim_df[col] = pd.to_numeric(claim_df[col], errors='coerce').fillna(0)
                        claim_df['Avg Claim per Visit'] = claim_df['Total Claim'] / claim_df['No of Visits'].replace(0, np.nan)
                        claim_data_by_year[year] = claim_df

        return patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data, "Data scraped successfully!"
    except Exception as e:
        return None, None, None, None, f"Error: {str(e)}"
    finally:
        driver.quit()

# --- Plotting Functions ---
def generate_yearly_charts(patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"):
    year_int = int(year)
    patient_df = patient_data_by_year.get(year_int, pd.DataFrame())
    claim_df = claim_data_by_year.get(year_int, pd.DataFrame())
    mc_df = mc_data_by_year.get(year_int, pd.DataFrame())
    
    if patient_df.empty or claim_df.empty or mc_df.empty:
        return [None] * 5, [None] * 6
    
    sns.set(style="whitegrid", palette="muted")
    plt.rcParams.update({'font.family': 'Verdana', 'font.size': 12, 'axes.titlesize': 16, 'axes.labelsize': 14})
    
    provider_charts = []
    employee_charts = []

    # Provider Charts
    for title, data, x, y, palette in [
        (f'Top 10 Providers by Total Visits ({year})', mc_df, 'No. of Visit', 'Provider', 'Blues_r'),
        (f'Top 10 Providers by Total MC Given ({year})', mc_df, 'Total MC Given', 'Provider', 'Greens_r'),
        (f'Top 10 Providers by Total Claim ({year})', claim_df, 'Total Claim', 'Provider Name', 'Oranges_r'),
    ]:
        plt.figure(figsize=(12, 6))
        top_data = data.sort_values(x, ascending=False).head(10)
        ax = sns.barplot(data=top_data, x=x, y=y, hue=y, palette=palette, legend=False)
        ax.set_title(title)
        for i, v in enumerate(top_data[x]):
            ax.text(v, i, f'{v:,.0f}', va='center')
        plt.tight_layout()
        provider_charts.append(plt.gcf())
        plt.close()

    if show_mc_pct:
        plt.figure(figsize=(12, 6))
        top_provs = mc_df.sort_values('% MC Given', ascending=(mc_sort_order == "asc")).head(10)
        ax = sns.barplot(data=top_provs, x='Provider', y='% MC Given', hue='Provider', palette='Purples_r', legend=False)
        ax.set_title(f'Top 10 Providers by % MC Given ({year})')
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_provs['% MC Given']):
            ax.text(i, v, f'{v:.1f}%', ha='center')
        plt.tight_layout()
        provider_charts.append(plt.gcf())
        plt.close()
    else:
        provider_charts.append(None)

    plt.figure(figsize=(12, 6))
    top_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10)
    ax = sns.barplot(data=top_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', palette='Reds_r', legend=False)
    ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})')
    plt.xticks(rotation=45, ha='right')
    for i, v in enumerate(top_avg_claim['Avg Claim per Visit']):
        ax.text(i, v, f'{v:.2f}', ha='center')
    plt.tight_layout()
    provider_charts.append(plt.gcf())
    plt.close()

    # Employee Charts
    for title, data, x, y, palette in [
        (f'Top 10 Employees by Total Visits ({year})', patient_df, 'Total Visit', 'Employee Name', 'Blues_r'),
        (f'Top 10 Employees by Total Claim ({year})', patient_df, 'Total Claim (Combined)', 'Employee Name', 'Oranges_r'),
        (f'Top 10 Employees by Total MC ({year})', patient_df, 'Total MC (Days)', 'Employee Name', 'Greens_r'),
    ]:
        plt.figure(figsize=(12, 6))
        top_data = data.sort_values(x, ascending=False).head(10)
        ax = sns.barplot(data=top_data, x=x, y=y, hue=y, palette=palette, legend=False)
        ax.set_title(title)
        for i, v in enumerate(top_data[x]):
            ax.text(v, i, f'{v:,.0f}', va='center')
        plt.tight_layout()
        employee_charts.append(plt.gcf())
        plt.close()

    for title, x, palette in [
        (f'Top 10 Employees by Avg Claim per Visit ({year})', 'Avg Claim per Visit', 'Reds_r'),
        (f'Top 10 Employees by Avg MC per Visit ({year})', 'Avg MC per Visit', 'Purples_r'),
    ]:
        plt.figure(figsize=(12, 6))
        top_data = patient_df.sort_values(x, ascending=False).head(10)
        ax = sns.barplot(data=top_data, x='Employee Name', y=x, hue='Employee Name', palette=palette, legend=False)
        ax.set_title(title)
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_data[x]):
            ax.text(i, v, f'{v:.2f}', ha='center')
        plt.tight_layout()
        employee_charts.append(plt.gcf())
        plt.close()

    plt.figure(figsize=(10, 6))
    division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum()
    plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'))
    plt.title(f'Claim Distribution by Division ({year})')
    plt.tight_layout()
    employee_charts.append(plt.gcf())
    plt.close()

    return [fig_to_image(fig) if fig else None for fig in provider_charts], [fig_to_image(fig) for fig in employee_charts]

def generate_monthly_employee_charts(monthly_patient_data, month):
    patient_df = monthly_patient_data.get(month, pd.DataFrame())
    if patient_df.empty:
        return [None] * 6
    
    sns.set(style="whitegrid", palette="muted")
    plt.rcParams.update({'font.family': 'Verdana', 'font.size': 12, 'axes.titlesize': 16, 'axes.labelsize': 14})
    
    charts = []
    month_name = patient_df['Month'].iloc[0] if not patient_df['Month'].empty else month

    for title, data, x, y, palette in [
        (f'Top 10 Employees by Total Visits ({month_name})', patient_df, 'Total Visit', 'Employee Name', 'Blues_r'),
        (f'Top 10 Employees by Total Claim ({month_name})', patient_df, 'Total Claim (Combined)', 'Employee Name', 'Oranges_r'),
        (f'Top 10 Employees by Total MC ({month_name})', patient_df, 'Total MC (Days)', 'Employee Name', 'Greens_r'),
    ]:
        plt.figure(figsize=(12, 6))
        top_data = data.sort_values(x, ascending=False).head(10)
        ax = sns.barplot(data=top_data, x=x, y=y, hue=y, palette=palette, legend=False)
        ax.set_title(title)
        for i, v in enumerate(top_data[x]):
            ax.text(v, i, f'{v:,.0f}', va='center')
        plt.tight_layout()
        charts.append(plt.gcf())
        plt.close()

    for title, x, palette in [
        (f'Top 10 Employees by Avg Claim per Visit ({month_name})', 'Avg Claim per Visit', 'Reds_r'),
        (f'Top 10 Employees by Avg MC per Visit ({month_name})', 'Avg MC per Visit', 'Purples_r'),
    ]:
        plt.figure(figsize=(12, 6))
        top_data = patient_df.sort_values(x, ascending=False).head(10)
        ax = sns.barplot(data=top_data, x='Employee Name', y=x, hue='Employee Name', palette=palette, legend=False)
        ax.set_title(title)
        plt.xticks(rotation=45, ha='right')
        for i, v in enumerate(top_data[x]):
            ax.text(i, v, f'{v:.2f}', ha='center')
        plt.tight_layout()
        charts.append(plt.gcf())
        plt.close()

    plt.figure(figsize=(10, 6))
    division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum()
    plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'))
    plt.title(f'Claim Distribution by Division ({month_name})')
    plt.tight_layout()
    charts.append(plt.gcf())
    plt.close()

    return [fig_to_image(fig) for fig in charts]

def fig_to_image(fig):
    if fig is None:
        return None
    fig.canvas.draw()
    img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
    plt.close(fig)
    return img_array

# --- Gradio Interface ---
with gr.Blocks(title="Claims Analysis Dashboard") as demo:
    gr.Markdown("# Claims Analysis Dashboard (2024 - Present)")
    
    with gr.Row():
        url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here")
        user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID")
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password")
    scrape_btn = gr.Button("Submit", variant="primary")
    
    with gr.Row():
        year_dropdown = gr.Dropdown(label="Select Year", choices=["2024", "2025"], value="2024")
        month_dropdown = gr.Dropdown(
            label="Select Month",
            choices=[datetime.strptime(f"{y}-{m:02d}", "%Y-%m").strftime("%b %Y") for y in [2024, 2025] for m in range(1, 13)][:datetime.now().month + 12*(datetime.now().year-2024)],
            value=datetime.now().strftime("%b %Y")
        )
        show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True)
        mc_sort_dropdown = gr.Dropdown(label="Sort % MC Given", choices=["desc", "asc"], value="desc")
    
    status_output = gr.Textbox(label="Status", interactive=False)
    patient_state = gr.State()
    claim_state = gr.State()
    mc_state = gr.State()
    monthly_patient_state = gr.State()

    with gr.Tabs():
        with gr.TabItem("Provider Insights (Yearly)"):
            with gr.Row():
                prov_chart1 = gr.Image(label="Total Visits by Providers")
                prov_chart2 = gr.Image(label="Total MC by Providers")
            with gr.Row():
                prov_chart3 = gr.Image(label="% MC Given by Providers")
            with gr.Row():
                prov_chart4 = gr.Image(label="Total Claim by Providers")
                prov_chart5 = gr.Image(label="Average Claim per Visit by Providers")

        with gr.TabItem("Employee Insights (Yearly)"):
            with gr.Row():
                emp_chart1 = gr.Image(label="Total Visits by Employees")
                emp_chart2 = gr.Image(label="Total Claim by Employees")
            with gr.Row():
                emp_chart3 = gr.Image(label="Average Claim per Visit by Employees")
                emp_chart4 = gr.Image(label="Total MC by Employees")
            with gr.Row():
                emp_chart5 = gr.Image(label="Average MC per Visit by Employees")
                emp_chart6 = gr.Image(label="Claim Distribution by Division")

        with gr.TabItem("Employee Insights (Monthly)"):
            with gr.Row():
                monthly_emp_chart1 = gr.Image(label="Total Visits by Employees")
                monthly_emp_chart2 = gr.Image(label="Total Claim by Employees")
            with gr.Row():
                monthly_emp_chart3 = gr.Image(label="Average Claim per Visit by Employees")
                monthly_emp_chart4 = gr.Image(label="Total MC by Employees")
            with gr.Row():
                monthly_emp_chart5 = gr.Image(label="Average MC per Visit by Employees")
                monthly_emp_chart6 = gr.Image(label="Claim Distribution by Division")

    def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order):
        patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data, status = scrape_data(url, user_id, password)
        if not all([patient_data_by_year, claim_data_by_year, mc_data_by_year]):
            return status, None, None, None, None, *[None] * 16
        
        prov_images, emp_images = generate_yearly_charts(patient_data_by_year, claim_data_by_year, mc_data_by_year, "2024", show_mc_pct, mc_sort_order)
        month_key = datetime.strptime(datetime.now().strftime("%b %Y"), "%b %Y").strftime("%Y-%m")
        monthly_emp_images = generate_monthly_employee_charts(monthly_patient_data, month_key)
        
        return (
            status, patient_data_by_year, claim_data_by_year, mc_data_by_year, monthly_patient_data,
            *prov_images, *emp_images, *monthly_emp_images
        )

    def update_yearly_dashboard(year, patient_data_by_year, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order):
        if not all([patient_data_by_year, claim_data_by_year, mc_data_by_year]):
            return [None] * 11
        prov_images, emp_images = generate_yearly_charts(patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order)
        return [*prov_images, *emp_images]

    def update_monthly_dashboard(month, monthly_patient_data):
        if not monthly_patient_data:
            return [None] * 6
        month_key = datetime.strptime(month, "%b %Y").strftime("%Y-%m")
        return generate_monthly_employee_charts(monthly_patient_data, month_key)

    scrape_btn.click(
        fn=scrape_and_store,
        inputs=[url_input, user_id_input, password_input, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[
            status_output, patient_state, claim_state, mc_state, monthly_patient_state,
            prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5,
            emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6,
            monthly_emp_chart1, monthly_emp_chart2, monthly_emp_chart3, monthly_emp_chart4, monthly_emp_chart5, monthly_emp_chart6
        ]
    )

    year_dropdown.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6]
    )

    show_mc_pct_checkbox.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6]
    )

    mc_sort_dropdown.change(
        fn=update_yearly_dashboard,
        inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown],
        outputs=[prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6]
    )

    month_dropdown.change(
        fn=update_monthly_dashboard,
        inputs=[month_dropdown, monthly_patient_state],
        outputs=[monthly_emp_chart1, monthly_emp_chart2, monthly_emp_chart3, monthly_emp_chart4, monthly_emp_chart5, monthly_emp_chart6]
    )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://dcab0eb1ef35f407af.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


