<a href="https://colab.research.google.com/github/Mainabryan/Student-life-segmentation/blob/main/steamlit_for_student_life_segementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install streamlit


Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [2]:
# student_clustering_app.py

import streamlit as st
import pandas as pdimport numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# ---------- Custom Page Setup ----------
st.set_page_config(page_title="Student Lifestyle Clustering", layout="wide")

# ---------- App Title ----------
st.markdown("<h1 style='text-align: center; color: #6C63FF;'>🎓 Student Lifestyle Clustering App</h1>", unsafe_allow_html=True)
st.markdown("<h4 style='text-align: center; color: grey;'>Cluster students based on lifestyle habits for smarter insights</h4>", unsafe_allow_html=True)
st.markdown("---")

# ---------- File Upload ----------
uploaded_file = st.file_uploader("📁 Upload your student dataset (.csv)", type="csv")

if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    st.success("✅ Data uploaded successfully!")
    st.write("**📄 Preview of your data:**")
    st.dataframe(data.head(), use_container_width=True)

    # ---------- Feature Selection ----------
    features = ['Age', 'Gender', 'Study Hours per Week',
                'Exercise Frequency (per week)', 'Sleep Hours per Night',
                'Cafeteria Spend ($/week)', 'Social Activity Score (1–100)']
    X = data[features]

    # ---------- Scaling ----------
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # ---------- Elbow Method ----------
    inertia = []
    for k in range(1, 11):
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(X_scaled)
        inertia.append(kmeans.inertia_)

    st.markdown("---")
    st.subheader("🔍 Determine the Best Number of Clusters (Elbow Method)")
    fig_elbow = plt.figure(figsize=(8, 5))
    plt.plot(range(1, 11), inertia, marker='o', linestyle='--', color='#6C63FF')
    plt.xlabel("Number of Clusters (k)")
    plt.ylabel("Inertia")
    plt.title("Elbow Curve for Optimal k")
    st.pyplot(fig_elbow)

    # ---------- Select k ----------
    st.markdown("### 📌 Choose the number of clusters")
    k = st.slider("Select number of clusters (k)", min_value=2, max_value=10, value=4)

    # ---------- Final Clustering ----------
    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(X_scaled)
    data['Cluster'] = labels

    # ---------- Cluster Visualization ----------
    st.markdown("---")
    st.subheader("📊 Visualize Student Clusters")
    col1, col2 = st.columns(2)
    with col1:
        x_axis = st.selectbox("🧭 Select X-axis Feature", features, index=2)
    with col2:
        y_axis = st.selectbox("🧭 Select Y-axis Feature", features, index=6)

    fig_cluster = plt.figure(figsize=(8, 6))
    plt.scatter(data[x_axis], data[y_axis], c=data['Cluster'], cmap='tab10', s=60)
    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    plt.title("Cluster Distribution")
    plt.colorbar(label="Cluster")
    st.pyplot(fig_cluster)

    # ---------- Cluster Summary ----------
    st.markdown("---")
    st.subheader("📈 Cluster Summary Statistics")
    st.dataframe(data.groupby('Cluster')[features].mean().round(2), use_container_width=True)

    # ---------- Footer ----------
    st.markdown("---")
    st.markdown("<p style='text-align: center; color: grey;'>🚀 Built by Bryan Waweru | Streamlit Clustering Project</p>", unsafe_allow_html=True)

else:
    st.warning("📌 Please upload a CSV file to get started.")


2025-06-14 06:30:01.296 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [3]:
%%writefile app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

st.set_page_config(page_title="Student Lifestyle Clustering", layout="wide")
st.title("🎓 Student Lifestyle Clustering App")

uploaded_file = st.file_uploader("📁 Upload your student dataset (.csv)", type="csv")

if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    st.success("✅ Data uploaded successfully!")
    st.dataframe(data.head())

    features = ['Age', 'Gender', 'Study Hours per Week',
                'Exercise Frequency (per week)', 'Sleep Hours per Night',
                'Cafeteria Spend ($/week)', 'Social Activity Score (1–100)']
    X = data[features]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    inertia = []
    for k in range(1, 11):
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(X_scaled)
        inertia.append(kmeans.inertia_)

    st.subheader("🔍 Elbow Method to Choose k")
    fig_elbow = plt.figure()
    plt.plot(range(1, 11), inertia, marker='o', linestyle='--', color='teal')
    plt.xlabel("Number of Clusters (k)")
    plt.ylabel("Inertia")
    plt.title("Elbow Method Plot")
    st.pyplot(fig_elbow)

    k = st.slider("Select number of clusters", 2, 10, 4)
    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(X_scaled)
    data['Cluster'] = labels

    x_axis = st.selectbox("X-axis Feature", features, index=2)
    y_axis = st.selectbox("Y-axis Feature", features, index=6)

    st.subheader("📊 Cluster Visualization")
    fig_cluster = plt.figure()
    plt.scatter(data[x_axis], data[y_axis], c=data['Cluster'], cmap='tab10', s=60)
    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    plt.title("Cluster Visualization")
    plt.colorbar(label="Cluster")
    st.pyplot(fig_cluster)

    st.subheader("📈 Cluster Summary Stats")
    st.dataframe(data.groupby("Cluster")[features].mean().round(2))
else:
    st.warning("📌 Please upload a CSV file to continue.")


Writing app.py


In [4]:
!pip install streamlit localtunnel -q

# Show your public IP (for debugging)
!wget -q -O - ipv4.icanhazip.com

# Start Streamlit app and expose with localtunnel
!streamlit run app.py & npx localtunnel --port 8501

[31mERROR: Could not find a version that satisfies the requirement localtunnel (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for localtunnel[0m[31m
[0m34.71.26.51

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.71.26.51:8501[0m
[0m
[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20Gmaishayaufala

[1G[0K⠙[1G[0K[1mnpm[22m [31merror[39m canceled
[1G[0K⠙[1G[0K[1mnpm[22m [31merror[39m A complete log of this run can be found in: /root/.npm/_logs/2025-06-14T06_39_23_339Z-debug-0.log
[1G[0K⠙[1G[0K[34m  Stopping...[0m
