In [1]:
!pip install -q streamlit pyngrok shap xgboost transformers scikit-learn pandas


In [5]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from xgboost import XGBClassifier, XGBRegressor
import requests
import os
import time

# Set Hugging Face API token
os.environ["HF_TOKEN"] = "REPLACE WITH YOURS"

def ask_llm(prompt):
    api_url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
    headers = {"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 300,
            "temperature": 0.7
        }
    }
    response = requests.post(api_url, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()[0]['generated_text']
    else:
        return f"Error: {response.status_code} - {response.text}"

st.title("Colab Streamlit: Train Model and Explain Predictions")

# File upload
uploaded = st.file_uploader("Upload a CSV dataset", type="csv")
if uploaded is not None:
    df = pd.read_csv(uploaded)
    st.write("Dataset preview:", df.head())

    target = st.selectbox("Select target column", df.columns)
    target_type = st.radio("Is the target categorical or numerical?", ("Numerical", "Categorical"))

    # Encode non-numeric columns
    df_clean = df.copy()
    for col in df_clean.columns:
        if col != target and df_clean[col].dtype == object:
            df_clean[col] = LabelEncoder().fit_transform(df_clean[col].astype(str))
    df_clean = df_clean.dropna()
    X = df_clean.drop(columns=[target])
    y = df_clean[target]

    if target_type == "Categorical":
        le_target = LabelEncoder()
        y = le_target.fit_transform(y.astype(str))

    # Choose model
    if target_type == "Numerical":
        model_name = st.selectbox("Choose model", ("Decision Tree Regressor", "Linear Regression", "XGBoost Regressor"))
    else:
        model_name = st.selectbox("Choose model", ("Decision Tree Classifier", "XGBoost Classifier"))

    # Train model
    if st.button("Train Model"):
        if target_type == "Numerical":
            if model_name == "Decision Tree Regressor":
                model = DecisionTreeRegressor(max_depth=5)
            elif model_name == "Linear Regression":
                model = LinearRegression()
            else:
                model = XGBRegressor(objective='reg:squarederror', n_estimators=100, verbosity=0)
        else:
            if model_name == "Decision Tree Classifier":
                model = DecisionTreeClassifier(max_depth=5)
            else:
                model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=100, verbosity=0)

        model.fit(X, y)
        st.success(f"Trained {model_name} on the data.")
        time.sleep(2)

        preds = model.predict(X)
        if target_type == "Numerical":
            mse = mean_squared_error(y, preds)
            st.write(f"Mean Squared Error on training set: {mse:.3f}")
        else:
            acc = accuracy_score(y, preds)
            st.write(f"Accuracy on training set: {acc:.2f}")

        # Feature importance (only for tree-based models)
        st.subheader("Top Features Based on Model Importance")
        try:
            importances = model.feature_importances_
            feat_df = pd.DataFrame({
                'Feature': X.columns,
                'Importance': importances
            }).sort_values(by='Importance', ascending=False)

            st.dataframe(feat_df.head(5), use_container_width=True)

            # Generate prompt for LLM
            explanation_prompt = "Explain what the following top features might mean in terms of their influence on a machine learning model's prediction target. Use simple language:\n\n"
            for _, row in feat_df.head(5).iterrows():
                explanation_prompt += f"- {row['Feature']} (Importance: {row['Importance']:.4f})\n"
            explanation_prompt += "\nBe clear and intuitive."

            with st.spinner("Generating interpretation using Mistral..."):
                result = ask_llm(explanation_prompt)

            st.subheader("AI-Generated Interpretation")
            st.markdown(result)

        except AttributeError:
            st.warning("This model does not provide feature importances. Try a tree-based model like XGBoost or Decision Tree.")


Overwriting app.py


In [6]:
from pyngrok import ngrok, conf
import os, time

# Configure Ngrok
conf.get_default().auth_token = "REPLACE WITH YOURS"

# Kill previous sessions
os.system("pkill streamlit")

# Launch Streamlit
os.system("nohup streamlit run app.py --server.port 8501 > log.txt 2>&1 &")


# Open ngrok tunnel
public_url = ngrok.connect(8501)
print("Streamlit app is live at:", public_url)


Streamlit app is live at: NgrokTunnel: "https://5eb6-35-193-23-63.ngrok-free.app" -> "http://localhost:8501"
