# Importing a library that is not in Colaboratory

To import a library that's not in Colaboratory by default, you can use `!pip install` or `!apt-get install`.

In [42]:
!pip install matplotlib-venn



In [43]:
!apt-get -qq install -y libfluidsynth1

E: Package 'libfluidsynth1' has no installation candidate


# Install 7zip reader [libarchive](https://pypi.python.org/pypi/libarchive)

In [44]:
# https://pypi.python.org/pypi/libarchive
!apt-get -qq install -y libarchive-dev && pip install -U libarchive
import libarchive



# Install GraphViz & [PyDot](https://pypi.python.org/pypi/pydot)

In [45]:
# https://pypi.python.org/pypi/pydot
!apt-get -qq install -y graphviz && pip install pydot
import pydot



# Install [cartopy](http://scitools.org.uk/cartopy/docs/latest/)

In [46]:
!pip install cartopy
import cartopy



In [47]:
!pip install streamlit




In [48]:
%%writefile app.py


Overwriting app.py


In [49]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import plotly.express as px

In [50]:
st.set_page_config(
    page_title="Global Development Decision System",
    layout="wide",
    initial_sidebar_state="expanded"
)

st.title(" Global Development Decision System")
st.markdown("""
**A Machine Learning Decision Support System.**
This tool analyzes socio-economic indicators to classify development levels,
visualize global trends, and generate actionable policy recommendations.
""")



DeltaGenerator()

In [51]:
#Robust Resource Loading (Split Caching for Stability)
@st.cache_resource
def load_model_bundle():
    """Loads the model and pipeline (Heavy resources)"""
    try:
        return joblib.load("development_cluster_pipeline_kmeans.joblib")
    except FileNotFoundError:
        st.error(" Model file missing. Upload 'development_cluster_pipeline_kmeans.joblib'.")
        st.stop()

@st.cache_data
def load_dataset():
    """Loads the dataset (Data resources)"""
    try:
        return pd.read_csv("clustered_data.csv")
    except FileNotFoundError:
        try:
            return pd.read_csv("clustered_data (1).csv")
        except FileNotFoundError:
            st.error(" Data file missing. Upload 'clustered_data.csv'.")
            st.stop()

# Initialize Resources
try:
    bundle = load_model_bundle()
    df_orig = load_dataset()
    df = df_orig.copy() # Work on a copy to prevent "Cached Object Mutated" error

    pipeline = bundle["pipeline"]
    model = bundle["model"]
    feature_cols = bundle["feature_cols"]
except Exception as e:
    st.error(f"Initialization Error: {e}")
    st.stop()

2025-12-18 06:18:04.769 No runtime found, using MemoryCacheStorageManager
2025-12-18 06:18:04.849 No runtime found, using MemoryCacheStorageManager


In [52]:

#  Auto-Labeling Logic (Poor / Developing / Developed)

def get_development_status_map(df):
    """Sort clusters by GDP to assign semantic labels."""

    # Detect cluster column
    cluster_col = next(
        (c for c in ["Cluster", "kmeans", "cluster_label"] if c in df.columns),
        None
    )
    if cluster_col is None:
        return {}, None

    # Detect wealth indicator
    wealth_col = next(
        (c for c in ["GDP", "GDP_per_capita", "Log_GDP"] if c in df.columns),
        None
    )
    if wealth_col is None:
        return {i: f"Cluster {i}" for i in df[cluster_col].unique()}, cluster_col

    # Rank clusters by average GDP
    cluster_ranking = (
        df.groupby(cluster_col)[wealth_col]
        .mean()
        .sort_values()
        .index.tolist()
    )

    # Force semantic labels
    mapping = {
        cluster_ranking[0]: "Poor Country",
        cluster_ranking[1]: "Developing Country",
        cluster_ranking[2]: "Developed Country"
    }

    return mapping, cluster_col


In [53]:
status_map, cluster_col = get_development_status_map(df)

if cluster_col:
    df["Status"] = df[cluster_col].map(status_map)
else:
    df["Status"] = "Unknown"


In [54]:
#  Sidebar: Predict Country Development

st.sidebar.header(" Predict Country Development")

input_data = {}

for col in feature_cols:
    if col in df.columns:
        min_v = float(df[col].min())
        max_v = float(df[col].max())
        mean_v = float(df[col].mean())
    else:
        min_v, max_v, mean_v = 0.0, 100.0, 50.0

    step = max((max_v - min_v) / 100, 0.01)

    input_data[col] = st.sidebar.number_input(
        col,
        min_value=min_v,
        max_value=max_v,
        value=mean_v,
        step=step
    )




In [55]:
if st.sidebar.button("Predict Status"):
    try:
        input_df = pd.DataFrame([input_data])

        # Transform input
        processed = pipeline.transform(input_df)

        # Predict cluster
        pred_id = model.predict(processed)[0]
        pred_label = status_map.get(pred_id, f"Cluster {pred_id}")

        # Confidence score
        distances = model.transform(processed)
        confidence = 1 / (1 + distances.min())

        # Display result
        if "Developed" in pred_label:
            st.sidebar.success(f" {pred_label}")
        elif "Developing" in pred_label:
            st.sidebar.warning(f" {pred_label}")
        else:
            st.sidebar.error(f"{pred_label}")

        st.sidebar.metric("Confidence Score", f"{confidence:.2f}")

        # Save for visualization
        st.session_state["new_pred"] = {
            "label": pred_label,
            "coords": processed[0][:2],
            "confidence": confidence
        }

    except Exception as e:
        st.sidebar.error(f"Prediction Error: {e}")




In [56]:

# 5. Main Dashboard (Tabs)

tab1, tab2, tab3, tab4 = st.tabs([
    "üîç Analysis & Policy",
    "üó∫Ô∏è World Map",
    "üìä Cluster Visualization",
    "üìà Feature Importance"
])


# TAB 1: Country Comparison & Policy

with tab1:
    st.subheader(" Country Development Comparison")

    if "Country" in df.columns:
        selected_country = st.selectbox(
            "Select a Country",
            sorted(df["Country"].unique())
        )

        country_row = df[df["Country"] == selected_country]

        if not country_row.empty:
            current_status = country_row["Status"].values[0]

            developed_df = df[df["Status"] == "Developed Country"]
            if developed_df.empty:
                developed_df = df

            target_avg = developed_df[feature_cols].mean()
            current_vals = country_row[feature_cols].iloc[0]

            col1, col2 = st.columns(2)
            with col1:
                st.info(f"**Current Status:** {current_status}")
            with col2:
                if "GDP" in current_vals:
                    gap = current_vals["GDP"] - target_avg["GDP"]
                    st.metric("GDP Gap vs Developed", f"${gap:,.0f}")

            comp_df = pd.DataFrame({
                "Current": current_vals,
                "Developed Avg": target_avg
            })

            norm_comp = (comp_df - comp_df.min()) / (comp_df.max() - comp_df.min() + 1e-9)
            st.bar_chart(norm_comp.head(10))

            st.subheader(" Policy Recommendations")

            HIGHER_BETTER = ["GDP", "Life", "Internet", "Phone", "Tourism"]
            LOWER_BETTER = ["Mortality", "Birth", "Tax", "Inflation"]

            recommendations = []
            for feat in feature_cols:
                val, tgt = current_vals[feat], target_avg[feat]

                if any(x in feat for x in HIGHER_BETTER) and val < tgt * 0.8:
                    recommendations.append(
                        f"**{feat}**: Increase investment to close the gap."
                    )

                if any(x in feat for x in LOWER_BETTER) and val > tgt * 1.2:
                    recommendations.append(
                        f"**{feat}**: Reduce this indicator through policy reform."
                    )

            if recommendations:
                for r in recommendations[:5]:
                    st.write(r)
            else:
                st.success(" Country meets most developed benchmarks.")



In [57]:
with tab2:
    st.subheader(" Global Development Status Map")

    if "Country" in df.columns:
        fig_map = px.choropleth(
            df,
            locations="Country",
            locationmode="country names",
            color="Status",
            hover_name="Country",
            color_discrete_map={
                "Poor Country": "#d62728",
                "Developing Country": "#ff7f0e",
                "Developed Country": "#2ca02c"
            },
            template="plotly_white",
            height=600
        )
        st.plotly_chart(fig_map, use_container_width=True)



In [58]:
with tab3:
    st.subheader(" PCA Cluster Visualization")

    X_pca = pipeline.transform(df[feature_cols])
    plot_df = pd.DataFrame(X_pca[:, :2], columns=["PC1", "PC2"])
    plot_df["Status"] = df["Status"]
    plot_df["Country"] = df["Country"] if "Country" in df.columns else df.index

    fig_pca = px.scatter(
        plot_df,
        x="PC1",
        y="PC2",
        color="Status",
        hover_name="Country",
        template="plotly_white",
        height=600
    )

    if "new_pred" in st.session_state:
        p = st.session_state["new_pred"]
        fig_pca.add_scatter(
            x=[p["coords"][0]],
            y=[p["coords"][1]],
            mode="markers+text",
            marker=dict(size=25, color="blue", symbol="star"),
            text=["NEW"],
            name=p["label"]
        )

    st.plotly_chart(fig_pca, use_container_width=True)


X has feature names, but SimpleImputer was fitted without feature names

2025-12-18 06:18:05.865 Please replace `use_container_width` with `width`.

`use_container_width` will be removed after 2025-12-31.

For `use_container_width=True`, use `width='stretch'`. For `use_container_width=False`, use `width='content'`.


In [59]:
with tab4:
    st.subheader(" Feature Importance by Development Level")

    importance = df.groupby("Status")[feature_cols].mean().T
    cols = ["Poor Country", "Developing Country", "Developed Country"]
    importance = importance[[c for c in cols if c in importance.columns]]

    st.dataframe(importance.style.background_gradient(axis=1))



In [61]:
st.markdown("---")



DeltaGenerator()

In [60]:
st.caption("¬© Global Development Decision System | ML-based Policy Support Tool")




DeltaGenerator()

In [62]:
!ls


 app.py		       development_cluster_pipeline_kmeans.joblib   sample_data
 cleaned_dataset.csv  'project_model_building_(3)_(1).ipynb'
 clustered_data.csv    requirements.txt


In [64]:
%%writefile requirements.txt
streamlit
pandas
numpy<2.0.0
scikit-learn==1.6.1
joblib
plotly


Overwriting requirements.txt


In [65]:
!ls



 app.py		       development_cluster_pipeline_kmeans.joblib   sample_data
 cleaned_dataset.csv  'project_model_building_(3)_(1).ipynb'
 clustered_data.csv    requirements.txt


In [66]:
from google.colab import files

files.download("app.py")
files.download("requirements.txt")
files.download("development_cluster_pipeline_kmeans.joblib")
files.download("clustered_data.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>