<a href="https://colab.research.google.com/github/Pritam-Mondal18/Predicting_Eligibility_for_Social_Welfare_Schemes_using_Machine-_Learning/blob/main/Predicting_Eligibility_for_Social_Welfare_Schemes_using_Machine__Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [90]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.48.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.48.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m94.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.48.1


In [91]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


# ============================================================
# Streamlit UI
# ============================================================
st.set_page_config(page_title="NSAP ML App", layout="wide")
st.title("📊 NSAP Scheme Classification ML App")

# Upload dataset
st.sidebar.header("Upload Dataset")
uploaded_file = st.sidebar.file_uploader("Upload CSV", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.success("✅ Dataset Loaded Successfully!")
    st.dataframe(df.head())

    # ---------------- Preprocessing ----------------
    target = "schemecode"
    features = [col for col in df.columns if col != target]

    # Encode target
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(df[target])

    # Feature Engineering
    if "totalmale" in df.columns and "totalfemale" in df.columns:
        df["gender_ratio"] = df["totalmale"] / (df["totalfemale"] + 1)
    if "totalfemale" in df.columns and "totalbeneficiaries" in df.columns:
        df["female_percentage"] = df["totalfemale"] / (df["totalbeneficiaries"] + 1)
    if "totalsc" in df.columns and "totalst" in df.columns and "totalbeneficiaries" in df.columns:
        df["scst_ratio"] = (df["totalsc"] + df["totalst"]) / (df["totalbeneficiaries"] + 1)

    # Keep only numeric features
    X_numeric = df.drop(target, axis=1).select_dtypes(include=np.number)

    # Train-Test Split FIRST
    X_train, X_test, y_train, y_test = train_test_split(
        X_numeric, y, test_size=0.2, random_state=42, stratify=y
    )

    # Feature selection on TRAIN only
    selector = SelectKBest(score_func=f_classif, k=min(10, X_train.shape[1]))
    selector.fit(X_train, y_train)
    selected_features = X_train.columns[selector.get_support()]

    X_train = X_train[selected_features]
    X_test = X_test[selected_features]

    st.write("🔹 Selected Features:", selected_features.tolist())

    # ---------------- Sidebar Model Selection ----------------
    st.sidebar.header("Model Selection")
    model_choice = st.sidebar.radio("Choose Model", ["Logistic Regression", "Decision Tree", "Random Forest"])

    # Define Pipelines
    if model_choice == "Logistic Regression":
        model = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", LogisticRegression(max_iter=2000, solver="lbfgs", random_state=42))
        ])
    elif model_choice == "Decision Tree":
        model = Pipeline([
            ("clf", DecisionTreeClassifier(random_state=42))
        ])
    else:  # Random Forest
        model = Pipeline([
            ("clf", RandomForestClassifier(n_estimators=100, random_state=42))
        ])

    # Train Model
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # ---------------- Results ----------------
    acc = accuracy_score(y_test, y_pred)
    cv_scores = cross_val_score(model, X_train, y_train, cv=5).mean()

    st.subheader(f"📈 Model: {model_choice}")
    st.metric("Test Accuracy", f"{acc:.4f}")
    st.metric("CV Accuracy", f"{cv_scores:.4f}")

    # Classification Report
    st.text("Classification Report:")
    st.text(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_, ax=ax)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    ax.set_title("Confusion Matrix")
    st.pyplot(fig)

    # ---------------- Prediction Section ----------------
    st.subheader("🔮 Try a Prediction")
    input_data = {}
    for col in selected_features:
        input_data[col] = st.number_input(
            f"Enter {col}",
            value=float(df[col].mean()),
            min_value=float(df[col].min()),
            max_value=float(df[col].max())
        )

    if st.button("Predict Scheme"):
        input_df = pd.DataFrame([input_data])
        pred = model.predict(input_df[selected_features])
        pred_label = label_encoder.inverse_transform(pred)[0]
        st.success(f"✅ Predicted Scheme: **{pred_label}**")

else:
    st.warning("⬅️ Please upload a CSV file to start.")


Writing app.py


In [92]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [96]:

from pyngrok import ngrok

# Add token here
!ngrok config add-authtoken YOUR_NGROK_TOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [94]:
from pyngrok import ngrok
public_url = ngrok.connect(8501)
print(public_url)

NgrokTunnel: "https://3a551f559135.ngrok-free.app" -> "http://localhost:8501"


In [95]:
!streamlit run app.py
# !streamlit run app.py &


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.238.95.23:8501[0m
[0m
[34m  Stopping...[0m
^C
