In [None]:
!pip install gradio

In [None]:
pip install --upgrade gradio


In [2]:
import gradio as gr
import pandas as pd

def eda_analysis(file):
    df = pd.read_csv(file)
    return df.head()

demo = gr.Interface(
    fn=eda_analysis,
    inputs="file",
    outputs= "dataframe",
)

demo.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [3]:
from functools import partial

dataset_file = "df_cleaned.csv"
df = pd.read_csv(dataset_file)
df.sort_values(by=['Countries, territories and areas', 'Year'], inplace=True)


In [4]:
import matplotlib.pyplot as plt
import io
from PIL import Image

def filter_dataset(countries, year_min, year_max, doctor_min):
    filtered = df.copy()

    # filtering by list of countries
    if "All" not in countries and len(countries) > 0:
        filtered = filtered[filtered["Countries, territories and areas"].isin(countries)]

    # filtering by Min and Max Year
    filtered = filtered[(filtered["Year"] >= year_min) & (filtered["Year"] <= year_max)]
    
    # Doctor filter
    filtered = filtered[filtered["Medical doctors (number)"] >= doctor_min]
    
    return filtered, filtered 


def summary(filtered):
    desc = filtered.describe()

    total_doctors = filtered["Medical doctors (number)"].sum()
    avg_doctors_per_10k = filtered["Medical doctors (per 10 000 population)"].mean()
    num_countries = filtered["Countries, territories and areas"].nunique()
    year_min = filtered["Year"].min()
    year_max = filtered["Year"].max()

    kpi_text = (
        f"**Total doctors:** {total_doctors:,.0f}\n\n"
        f"**Average doctors per 10,000 population:** {avg_doctors_per_10k:.2f}\n\n"
        f"**Number of countries:** {num_countries}\n\n"
        f"**Year range:** {year_min} - {year_max}"
    )

    generalist = filtered["Generalist medical practitioners (number)"].sum()
    specialist = filtered["Specialist medical practitioners (number)"].sum()
    not_defined = filtered["Medical doctors not further defined (number)"].sum()

    labels = ["Generalist", "Specialist", "Not further defined"]
    sizes = [generalist, specialist, not_defined]
    colors = ["#66b3ff", "#99ff99", "#ffcc99"]

    plt.figure(figsize=(4, 4))
    plt.pie(sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=140)
    plt.title("Doctor Types Proportion")
    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format="png")
    plt.close()
    buf.seek(0)
    pil_img = Image.open(buf)  # Convert BytesIO to PIL Image

    return desc, kpi_text, pil_img

def visualization():
    test = "tset"
    return test


with gr.Blocks() as app:
    
    # State to share filtered data between tabs
    filtered_data = gr.State(df) 
    
    with gr.Tab("Filter"):
        with gr.Row():
              with gr.Column():
                  country = gr.Dropdown(
                      choices=sorted(df["Countries, territories and areas"].unique().tolist()),
                      value=[],  
                      label="Countries",
                      multiselect=True,
                      allow_custom_value=False,
                      max_choices=50 
                  )
                  year_min = gr.Number(value=df["Year"].min(), label="Min Year")
                  year_max = gr.Number(value=df["Year"].max(), label="Max Year")
                  doctor_min = gr.Number(value=0, label="Min Doctors")
                  gr.Markdown("💡 **Tip:** Hold Ctrl/Cmd to select multiple countries")
                  filter_btn = gr.Button("Apply Filters")
              with gr.Column():
                    filtered_df = gr.Dataframe(headers=list(df.columns), interactive=False)
    
    with gr.Tab("Summary"):
        with gr.Row():
            summary_df = gr.Dataframe(interactive=False, label="Statistical Summary")
        with gr.Row():
            kpi_text = gr.Markdown()
            pie_chart = gr.Image(label="Doctor Types Proportion")

    with gr.Tab("Visualization"):
        test = gr.Textbox(label="Context", value=visualization())
        

    
    # Filter tab logic
    filter_btn.click(
        filter_dataset,
        [country, year_min, year_max, doctor_min],
        [filtered_df, filtered_data]
    )
    
    # Auto-update summary when filtered data changes
    filtered_data.change(
        summary,
        filtered_data,
        [summary_df, kpi_text, pie_chart],
        test
    )

app.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [None]:
# Load your CSV files
df = pd.read_csv("Doctors_by_countries.csv")  # Replace with your actual path
df_clean = pd.read_csv("df_cleaned.csv") 

In [None]:
pip install missingno


In [None]:
def analyze_missing():
    # Basic Info
    shape = df.shape
    dtypes = df.dtypes.to_dict()
    
    # Missing Value Analysis
    missing_sum = df.isna().sum()
    missing_pct = (missing_sum / len(df) * 100).round(2)
    missing_report = pd.DataFrame({
        'Missing Count': missing_sum,
        'Missing Percentage': missing_pct
    }).sort_values('Missing Count', ascending=False)
    
    # Duplicates
    duplicates = df[df.duplicated(keep=False)]

    
    # Generate Matrix Visualization
    plt.figure(figsize=(12, 6))
    plt.imshow(df.isna().T, aspect='auto', cmap='gray_r', 
               interpolation='none', origin='lower')
    plt.yticks(range(len(df.columns)), df.columns)
    plt.title("Missing Value Patterns\n(Black = Missing, White = Present)")
    plt.xlabel("Row Index")
    plt.grid(False)
    
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight', dpi=100)
    plt.close()
    buf.seek(0)
    matrix_img = Image.open(buf)
    
    # Data Cleaning Recommendations
    actions = []
    for col in missing_report[missing_report['Missing Count'] > 0].index:
        pct = missing_pct[col]
        if pct > 20:
            actions.append(f"❌ Remove {col} ({pct}% missing)")
        elif pct > 5:
            actions.append(f"⚠️ Impute {col} ({pct}% missing)")
        else:
            actions.append(f"✅ Keep {col} ({pct}% missing)")

    return {
        "shape": f"{shape[0]} rows × {shape[1]} columns",
        "dtypes": dtypes,
        "missing": missing_report,
        "duplicates": duplicates,
        "actions": actions,
        "matrix": matrix_img,
        "num_duplicates": len(duplicates),
        "headers": list(df.columns)
    }

def generate_report():
    analysis = analyze_missing()
    
    report = f"""
## 📋 Basic Information
**Shape:** {analysis['shape']}  
**Columns & Types:**  
{chr(10).join([f"- {k}: {v}" for k,v in analysis['dtypes'].items()])}

## 🔍 Missing Value Analysis
{analysis['missing'].to_markdown()}

## 🧹 Recommended Actions
{chr(10).join(analysis['actions'])}

## ♻️ Duplicates Found: {analysis['num_duplicates']}
"""
    return report, analysis['matrix'], analysis['duplicates']

with gr.Blocks() as app:
    gr.Markdown("# Missing Value Analysis Report")
    
    with gr.Row():
        with gr.Column():
            report = gr.Markdown()
            matrix = gr.Image(label="Missing Value Patterns", 
                            width=800,
                            format='png')
        with gr.Column():
            duplicates = gr.Dataframe(
                label="Duplicate Entries",
                headers=analyze_missing()["headers"],  # Dynamic headers
                interactive=False,
                elem_classes=["scrollable-df"]
            )
    
    # CSS for scrollable dataframe
    app.css = """
    .scrollable-df {
        max-height: 500px;
        overflow-y: auto;
        display: block;
    }
    """
    
    app.load(generate_report, [], [report, matrix, duplicates])

app.launch()