In [1]:
%pip install streamlit plotly

Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting plotly
  Downloading plotly-6.0.1-py3-none-any.whl.metadata (6.7 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Downloading tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting narwhals>=1.15.1 (from plotly)
  Downloading narwhals-1.35.0-py3-none-any.whl.metadata (9.2 kB)
Collecting jsonschema>=3.0 (from altair<6,>=4.0->streamlit)
  Usin

In [3]:
import streamlit as st
import numpy as np
import pandas as pd

# Sidebar
st.sidebar.header("Sidebar Controls")
# Sidebar Widgets for controlling what's shown in the columns
number = st.sidebar.number_input(
    "Select number of data points", min_value=10, max_value=100, value=50
)
option = st.sidebar.selectbox("Choose your chart type", ("Line Chart", "Bar Chart"))
st.sidebar.write("You selected:", option)

# Main content
st.title("Streamlit Demo App")

# Using container
with st.container():
    st.header("Interactive Data Visualization")
    st.write(
        "The chart in Column 1 and the data in Column 2 change based on sidebar selections."
    )

# Generating sample data based on the number input
data = pd.DataFrame({"x": range(number), "y": np.random.randn(number).cumsum()})

# Columns for displaying charts and data based on sidebar inputs
col1, col2 = st.columns(2)

with col1:
    st.header("Column 1: Visualization")
    if option == "Line Chart":
        st.line_chart(data)
    elif option == "Bar Chart":
        st.bar_chart(data["y"])

with col2:
    st.header("Column 2: Data Preview")
    # Checkbox to select whether to preview the data
    if st.checkbox("Preview data", key="preview_data"):
        # Display the DataFrame without the index
        st.dataframe(data.reset_index(drop=True))

# Expander for additional details
with st.expander("See more details"):
    st.write(
        """
        This example demonstrates how inputs from the sidebar can dynamically affect the content 
        within the app. Changing the number of data points or the chart type in the sidebar 
        updates the visualization and data preview in real-time.
    """
    )



In [2]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Page configuration
st.set_page_config(layout="wide")
st.title("Iris Dataset Analysis")


# Load and prepare data
@st.cache_data
def load_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df["Species"] = pd.Categorical.from_codes(iris.target, iris.target_names)
    return df, iris.feature_names


df, feature_names = load_data()
X = df[feature_names].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Sidebar controls
st.sidebar.header("Analysis Controls")
st.sidebar.write("Create a slider here")

# 1. Feature Distribution Analysis
st.header("1. Feature Distributions by Species")

# Colors for species
colors = {"setosa": "#FF4B4B", "versicolor": "#4B4BFF", "virginica": "#4BFF4B"}

# Feature selection for box plot

_, col, _ = st.columns([1, 3, 1])
with col:
    selected_feature = st.selectbox("Select Feature for Box Plot:", feature_names)

# Create box plot for selected feature

fig_box = px.box(
    df,
    y=selected_feature,
    x="Species",
    color="Species",
    color_discrete_map=colors,
    title=f"Distribution of {selected_feature} by Species",
    labels={selected_feature: selected_feature, "Species": "Species"},
    category_orders={"Species": sorted(df["Species"].unique())},
)  # Sort legend

fig_box.update_layout(
    title=f"Distribution of {selected_feature} by Species",
    yaxis_title=selected_feature,
    showlegend=True,
)

_, col, _ = st.columns([1, 3, 1])
with col:
    st.plotly_chart(fig_box)

# 2. Feature Relationships
st.header("2. Feature Relationships")

st.write("Draw a scatter matrix plot here")


# 3. Feature Correlations
st.header("3. Feature Correlations")
correlation = df[feature_names].corr()

# Create correlation heatmap
fig_corr = go.Figure(
    data=go.Heatmap(
        z=correlation,
        x=feature_names,
        y=feature_names,
        colorscale="RdBu",
        zmin=-1,
        zmax=1,
        text=np.round(correlation, 2),
        texttemplate="%{text}",
        textfont={"size": 12},
        hoverongaps=False,
    )
)

fig_corr.update_layout(title="Feature Correlation Matrix")

_, col, _ = st.columns([1, 2, 1])
with col:
    st.plotly_chart(fig_corr)

# 4. Elbow Analysis
st.header("4. Elbow Analysis")


@st.cache_data
def perform_elbow_analysis(X, max_clusters=10):
    inertias = []
    for k in range(1, max_clusters + 1):
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(X)
        inertias.append(kmeans.inertia_)
    return inertias


inertias = perform_elbow_analysis(X_scaled)

st.write("Draw a line chart here")

# 5. Clustering Analysis
st.header("5. Clustering Analysis")

# Perform clustering
clusters = 3
kmeans = KMeans(n_clusters=clusters, random_state=42)
cluster_labels = kmeans.fit_predict(X_scaled)
df["Cluster"] = cluster_labels.astype(str)

# Create comparison plots
col1, col2 = st.columns(2)

with col1:
    st.subheader("Clustering Result")
    st.write("Draw a scatter plot here")


with col2:
    st.subheader("Actual Species")
    st.write("Draw a scatter plot here")


# 6. Clustering Performance Analysis
st.header("6. Clustering Performance")
confusion_df = pd.crosstab(df["Species"], df["Cluster"], margins=True)
st.write("Confusion Matrix (Species vs Clusters):")
st.write(confusion_df)

# 7. Additional Statistics
st.header("7. Feature Statistics")
col3, col4 = st.columns(2)

with col3:
    st.subheader("Statistics by Species")
    species_stats = (
        df.groupby("Species", observed=True)[feature_names]
        .agg(["mean", "std"])
        .round(2)
    )
    st.write(species_stats)

with col4:
    st.subheader("Statistics by Cluster")
    cluster_stats = (
        df.groupby("Cluster", observed=True)[feature_names]
        .agg(["mean", "std"])
        .round(2)
    )
    st.write(cluster_stats)

2025-04-15 17:16:19.104 
  command:

    streamlit run /Users/k.chanatip/Desktop/CP49/Year-3/Data-Sci-Data-En/.venv/lib/python3.13/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-04-15 17:16:19.105 No runtime found, using MemoryCacheStorageManager
2025-04-15 17:16:19.105 No runtime found, using MemoryCacheStorageManager
2025-04-15 17:16:19.115 Session state does not function when running a script without `streamlit run`
2025-04-15 17:16:19.202 No runtime found, using MemoryCacheStorageManager
2025-04-15 17:16:19.203 No runtime found, using MemoryCacheStorageManager
