<a href="https://colab.research.google.com/github/FrankPerera04/DSPL_Individual_CW/blob/main/Developing%20Streamlit/App_Versions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Version 1

In [None]:
# Write the app
%%writefile app.py
import streamlit as st

# Set page config IMMEDIATELY after importing streamlit
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

import pandas as pd
import plotly.express as px

# Load data from Google Drive path
@st.cache_data
def load_data():
    # Change this to your Google Drive path if needed
    return pd.read_csv("https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv")

df = load_data()

# Dashboard title
st.title("🏨 Accommodation Insights Dashboard")

# Overview cards
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Records", len(df))
col2.metric("Unique Districts", df["District"].nunique())
col3.metric("Accommodation Types", df["Type"].nunique())
col4.metric("Total Rooms", int(df["Rooms"].sum()))

# Sidebar filters
with st.sidebar:
    st.header("🔍 Filters")
    selected_districts = st.multiselect(
        "Select District(s)",
        df["District"].unique(),
        default=df["District"].unique()
    )
    selected_types = st.multiselect(
        "Select Accommodation Type(s)",
        df["Type"].unique(),
        default=df["Type"].unique()
    )

# Apply filters
filtered_df = df[
    (df["District"].isin(selected_districts)) &
    (df["Type"].isin(selected_types))
]

# Bar chart: Number of accommodations per district
st.subheader("🏙️ Number of Accommodations per District")
district_count = filtered_df["District"].value_counts().reset_index()
district_count.columns = ["District", "Count"]
fig1 = px.bar(
    district_count,
    x="District",
    y="Count",
    color="District",
    text="Count",
    template="plotly_dark"
)
st.plotly_chart(fig1, use_container_width=True)

# Pie chart: Distribution by accommodation type
st.subheader("🏘️ Distribution by Accommodation Type")
type_count = filtered_df["Type"].value_counts().reset_index()
type_count.columns = ["Type", "Count"]
fig2 = px.pie(
    type_count,
    names="Type",
    values="Count",
    hole=0.4,
    template="plotly_dark"
)
st.plotly_chart(fig2, use_container_width=True)

# Bar chart: Total number of rooms per accommodation type
st.subheader("🛏️ Total Number of Rooms per Accommodation Type")
rooms_by_type = filtered_df.groupby("Type")["Rooms"].sum().reset_index().sort_values(by="Rooms", ascending=False)
fig3 = px.bar(
    rooms_by_type,
    x="Type",
    y="Rooms",
    color="Rooms",
    text="Rooms",
    template="plotly_dark",
    labels={"Rooms": "Total Rooms", "Type": "Accommodation Type"}
)
fig3.update_layout(
    xaxis_title="Accommodation Type",
    yaxis_title="Total Rooms",
    xaxis_tickangle=-45
)
st.plotly_chart(fig3, use_container_width=True)

# Map: Locations of accommodations
st.subheader("🗺️ Accommodation Locations Map")
st.map(
    filtered_df[["Latitude", "Logitiute"]]
    .rename(columns={"Logitiute": "longitude", "Latitude": "latitude"})
)

# Data table: Show filtered data
st.subheader("📋 Filtered Accommodation Data")
st.dataframe(filtered_df, use_container_width=True)


## Version 2

In [None]:
# Write the app
%%writefile app.py
import streamlit as st

# Set page config IMMEDIATELY after importing streamlit
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

import pandas as pd
import plotly.express as px

# Load data from Google Drive path
@st.cache_data
def load_data():
    # Change this to your Google Drive path if needed
    return pd.read_csv("https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv")

df = load_data()

# Dashboard title
st.title("🏨 Accommodation Insights Dashboard")

# Overview cards
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Records", len(df))
col2.metric("Unique Districts", df["District"].nunique())
col3.metric("Accommodation Types", df["Type"].nunique())
col4.metric("Total Rooms", int(df["Rooms"].sum()))

# Sidebar filters
with st.sidebar:
    st.header("🔍 Filters")
    selected_districts = st.multiselect(
        "Select District(s)",
        df["District"].unique(),
        default=df["District"].unique()
    )
    selected_types = st.multiselect(
        "Select Accommodation Type(s)",
        df["Type"].unique(),
        default=df["Type"].unique()
    )

# Apply filters
filtered_df = df[
    (df["District"].isin(selected_districts)) &
    (df["Type"].isin(selected_types))
]


# Accommodation Capacity by Region
st.subheader("🏙️ Accommodation Capacity by District")

# Group data
capacity_df = filtered_df.groupby("District").agg(
    Total_Accommodations=("Name", "count"),
    Total_Rooms=("Rooms", "sum"),
    Average_Rooms_Per_Accommodation=("Rooms", "mean")
).reset_index().sort_values(by="Total_Rooms", ascending=False)

# Round the average nicely
capacity_df["Average_Rooms_Per_Accommodation"] = capacity_df["Average_Rooms_Per_Accommodation"].round(1)

# Show as table
st.dataframe(capacity_df, use_container_width=True)

# Optional: Bar chart visualization
st.subheader("🏢 Total Rooms by District")
fig_capacity = px.bar(
    capacity_df,
    x="District",
    y="Total_Rooms",
    text="Total_Rooms",
    color="Total_Rooms",
    template="plotly_dark",
    labels={"Total_Rooms": "Total Rooms", "District": "District"}
)
fig_capacity.update_layout(
    xaxis_title="District",
    yaxis_title="Total Number of Rooms",
    xaxis_tickangle=-45
)
st.plotly_chart(fig_capacity, use_container_width=True)



# Bar chart: Number of accommodations per district
st.subheader("🏙️ Number of Accommodations per District")
district_count = filtered_df["District"].value_counts().reset_index()
district_count.columns = ["District", "Count"]
fig1 = px.bar(
    district_count,
    x="District",
    y="Count",
    color="District",
    text="Count",
    template="plotly_dark"
)
st.plotly_chart(fig1, use_container_width=True)

# Pie chart: Distribution by accommodation type
st.subheader("🏘️ Distribution by Accommodation Type")
type_count = filtered_df["Type"].value_counts().reset_index()
type_count.columns = ["Type", "Count"]
fig2 = px.pie(
    type_count,
    names="Type",
    values="Count",
    hole=0.4,
    template="plotly_dark"
)
st.plotly_chart(fig2, use_container_width=True)

# Bar chart: Total number of rooms per accommodation type
st.subheader("🛏️ Total Number of Rooms per Accommodation Type")
rooms_by_type = filtered_df.groupby("Type")["Rooms"].sum().reset_index().sort_values(by="Rooms", ascending=False)
fig3 = px.bar(
    rooms_by_type,
    x="Type",
    y="Rooms",
    color="Rooms",
    text="Rooms",
    template="plotly_dark",
    labels={"Rooms": "Total Rooms", "Type": "Accommodation Type"}
)
fig3.update_layout(
    xaxis_title="Accommodation Type",
    yaxis_title="Total Rooms",
    xaxis_tickangle=-45
)
st.plotly_chart(fig3, use_container_width=True)

# Map: Locations of accommodations
st.subheader("🗺️ Accommodation Locations Map")
st.map(
    filtered_df[["Latitude", "Logitiute"]]
    .rename(columns={"Logitiute": "longitude", "Latitude": "latitude"})
)

# Data table: Show filtered data
st.subheader("📋 Filtered Accommodation Data")
st.dataframe(filtered_df, use_container_width=True)


## Version 3


In [None]:
import streamlit as st

# Set page config IMMEDIATELY after importing streamlit
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

import pandas as pd
import plotly.express as px

# Load data from Google Drive path
@st.cache_data
def load_data():
    # Change this to your Google Drive path if needed
    return pd.read_csv("https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv")

df = load_data()

# Dashboard title
st.title("🏢 Accommodation Insights Dashboard")

# Overview cards
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Records", len(df))
col2.metric("Unique Districts", df["District"].nunique())
col3.metric("Accommodation Types", df["Type"].nunique())
col4.metric("Total Rooms", int(df["Rooms"].sum()))

# Sidebar filters
with st.sidebar:
    st.header("🔍 Filters")
    selected_districts = st.multiselect(
        "Select District(s)",
        df["District"].unique(),
        default=df["District"].unique()
    )
    selected_types = st.multiselect(
        "Select Accommodation Type(s)",
        df["Type"].unique(),
        default=df["Type"].unique()
    )

# Apply filters
filtered_df = df[
    (df["District"].isin(selected_districts)) &
    (df["Type"].isin(selected_types))
]

# Accommodation Metrics by District
st.subheader("Accommodation Metrics by District")

# Grouping data
metrics_df = filtered_df.groupby("District").agg(
    Total_Accommodations=("Name", "count"),
    Total_Rooms=("Rooms", "sum"),
    Average_Rooms_Per_Property=("Rooms", "mean")
).reset_index()

# Round average to 1 decimal
metrics_df["Average_Rooms_Per_Property"] = metrics_df["Average_Rooms_Per_Property"].round(1)

# 1. Total Number of Accommodations per District
st.subheader("1 - Total Number of Accommodations per District")

# Sort the dataframe by Total_Accommodations descending
metrics_df_sorted = metrics_df.sort_values(by="Total_Accommodations", ascending=False)

fig_acc = px.bar(
    metrics_df_sorted,
    x="District",
    y="Total_Accommodations",
    color="Total_Accommodations",
    text="Total_Accommodations",
    template="plotly_dark"
)

fig_acc.update_layout(
    xaxis_title="District",
    yaxis_title="Number of Accommodations",
    xaxis_tickangle=-45
)

st.plotly_chart(fig_acc, use_container_width=True)


# 2. Total Number of Rooms per District
st.subheader("2 - Total Number of Rooms per District")
fig_rooms = px.bar(
    metrics_df,
    x="District",
    y="Total_Rooms",
    color="Total_Rooms",
    text="Total_Rooms",
    template="plotly_dark"
)
fig_rooms.update_layout(xaxis_title="District", yaxis_title="Number of Rooms", xaxis_tickangle=-45)
st.plotly_chart(fig_rooms, use_container_width=True)

# 3. Average Size of Accommodations (Rooms per Property)
st.subheader("3 - Average Size of Accommodations (Rooms per Property)")
fig_avg = px.bar(
    metrics_df,
    x="District",
    y="Average_Rooms_Per_Property",
    color="Average_Rooms_Per_Property",
    text="Average_Rooms_Per_Property",
    template="plotly_dark"
)
fig_avg.update_layout(xaxis_title="District", yaxis_title="Average Rooms per Property", xaxis_tickangle=-45)
st.plotly_chart(fig_avg, use_container_width=True)




# Bar chart: Number of accommodations per district
st.subheader("🏙️ Number of Accommodations per District")
district_count = filtered_df["District"].value_counts().reset_index()
district_count.columns = ["District", "Count"]
fig1 = px.bar(
    district_count,
    x="District",
    y="Count",
    color="District",
    text="Count",
    template="plotly_dark"
)
st.plotly_chart(fig1, use_container_width=True)

# Pie chart: Distribution by accommodation type
st.subheader("🏘️ Distribution by Accommodation Type")
type_count = filtered_df["Type"].value_counts().reset_index()
type_count.columns = ["Type", "Count"]
fig2 = px.pie(
    type_count,
    names="Type",
    values="Count",
    hole=0.4,
    template="plotly_dark"
)
st.plotly_chart(fig2, use_container_width=True)

# Bar chart: Total number of rooms per accommodation type
st.subheader("🛏️ Total Number of Rooms per Accommodation Type")
rooms_by_type = filtered_df.groupby("Type")["Rooms"].sum().reset_index().sort_values(by="Rooms", ascending=False)
fig3 = px.bar(
    rooms_by_type,
    x="Type",
    y="Rooms",
    color="Rooms",
    text="Rooms",
    template="plotly_dark",
    labels={"Rooms": "Total Rooms", "Type": "Accommodation Type"}
)
fig3.update_layout(
    xaxis_title="Accommodation Type",
    yaxis_title="Total Rooms",
    xaxis_tickangle=-45
)
st.plotly_chart(fig3, use_container_width=True)

# Map: Locations of accommodations
st.subheader("🗺️ Accommodation Locations Map")
st.map(
    filtered_df[["Latitude", "Logitiute"]]
    .rename(columns={"Logitiute": "longitude", "Latitude": "latitude"})
)

# Data table: Show filtered data
st.subheader("📋 Filtered Accommodation Data")
st.dataframe(filtered_df, use_container_width=True)


## Version 4


In [None]:
# Write the app
%%writefile app.py
import streamlit as st

# Set page config IMMEDIATELY after importing streamlit
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

import pandas as pd
import plotly.express as px

# Load data from Google Drive path
@st.cache_data
def load_data():
    # Change this to your Google Drive path if needed
    return pd.read_csv("https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv")

df = load_data()

# Dashboard title
st.title("🏢 Accommodation Insights Dashboard")

# Overview cards
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Records", len(df))
col2.metric("Unique Districts", df["District"].nunique())
col3.metric("Accommodation Types", df["Type"].nunique())
col4.metric("Total Rooms", int(df["Rooms"].sum()))

# Sidebar filters
with st.sidebar:
    st.header("🔍 Filters")
    selected_districts = st.multiselect(
        "Select District(s)",
        df["District"].unique(),
        default=df["District"].unique()
    )
    selected_types = st.multiselect(
        "Select Accommodation Type(s)",
        df["Type"].unique(),
        default=df["Type"].unique()
    )

# Apply filters
filtered_df = df[
    (df["District"].isin(selected_districts)) &
    (df["Type"].isin(selected_types))
]






# Accommodation Metrics by District
st.subheader("Accommodation Metrics by District")

# Grouping data
metrics_df = filtered_df.groupby("District").agg(
    Total_Accommodations=("Name", "count"),
    Total_Rooms=("Rooms", "sum"),
    Average_Rooms_Per_Property=("Rooms", "mean")
).reset_index()

# Round average to 1 decimal
metrics_df["Average_Rooms_Per_Property"] = metrics_df["Average_Rooms_Per_Property"].round(1)



# 1. Total Number of Accommodations per District
st.subheader("1 - Total Number of Accommodations per District")

# Sort the dataframe by Total_Accommodations descending
metrics_df_sorted = metrics_df.sort_values(by="Total_Accommodations", ascending=False)

fig_acc = px.bar(
    metrics_df_sorted,
    x="District",
    y="Total_Accommodations",
    color="Total_Accommodations",
    text="Total_Accommodations",
    template="plotly_dark"
)

fig_acc.update_layout(
    xaxis_title="District",
    yaxis_title="Number of Accommodations",
    xaxis_tickangle=-45
)

st.plotly_chart(fig_acc, use_container_width=True)



# 2. Total Number of Rooms per District
st.subheader("2 - Total Number of Rooms per District")

# Sort the dataframe by Total_Rooms descending
metrics_df_rooms_sorted = metrics_df.sort_values(by="Total_Rooms", ascending=False)

fig_rooms = px.bar(
    metrics_df_rooms_sorted,
    x="District",
    y="Total_Rooms",
    color="Total_Rooms",
    text="Total_Rooms",
    template="plotly_dark"
)
fig_rooms.update_layout(xaxis_title="District", yaxis_title="Number of Rooms", xaxis_tickangle=-45)
st.plotly_chart(fig_rooms, use_container_width=True)



# 3. Average Size of Accommodations (Rooms per Property)
st.subheader("3 - Average Size of Accommodations (Rooms per Property)")

# Sort the dataframe by Average_Rooms_Per_Property descending
metrics_df_avg_sorted = metrics_df.sort_values(by="Average_Rooms_Per_Property", ascending=False)

fig_avg = px.bar(
    metrics_df_avg_sorted,
    x="District",
    y="Average_Rooms_Per_Property",
    color="Average_Rooms_Per_Property",
    text="Average_Rooms_Per_Property",
    template="plotly_dark"
)
fig_avg.update_layout(xaxis_title="District", yaxis_title="Average Rooms per Property", xaxis_tickangle=-45)
st.plotly_chart(fig_avg, use_container_width=True)






#Geographic Gaps
# Map of Accommodations + Highlight Low Accommodation Districts
st.subheader("🗺️ Geographic Gaps in Accommodation Availability")

# Prepare counts
district_counts = filtered_df.groupby("District").agg(
    Total_Accommodations=("Name", "count")
).reset_index()

# Merge counts back with filtered data
map_df = pd.merge(filtered_df, district_counts, on="District", how="left")

# Plotting the accommodations on map
st.map(
    map_df[["Latitude", "Logitiute"]].rename(columns={"Logitiute": "longitude", "Latitude": "latitude"})
)

# Highlight districts with very few accommodations
low_accommodation_threshold = 5  # You can adjust this threshold
low_acc_districts = district_counts[district_counts["Total_Accommodations"] <= low_accommodation_threshold]

if not low_acc_districts.empty:
    st.warning("⚠️ Districts with very few accommodations (<= 5):")
    st.dataframe(low_acc_districts, use_container_width=True)
else:
    st.success("✅ All districts have sufficient accommodation coverage.")







# Type Distribution
# Accommodation Type Distribution
st.subheader("🏘️ Accommodation Type Distribution")

# Group data by Type
type_distribution = filtered_df["Type"].value_counts().reset_index()
type_distribution.columns = ["Accommodation Type", "Count"]

# Pie Chart: Distribution of Accommodation Types
fig_type = px.pie(
    type_distribution,
    names="Accommodation Type",
    values="Count",
    title="Accommodation Types Share",
    hole=0.4,
    template="plotly_dark"
)
st.plotly_chart(fig_type, use_container_width=True)

# Optional: Show data table
st.dataframe(type_distribution, use_container_width=True)











# Action Signals
# Action Signals: Highlighting Low Accommodation Districts
st.subheader("🚨 Action Signals: Districts Needing Accommodation Improvement")

# Set a threshold for "low accommodation" (example: less than or equal to 5)
low_threshold = 5

# Group by district
action_df = filtered_df.groupby("District").agg(
    Total_Accommodations=("Name", "count"),
    Total_Rooms=("Rooms", "sum")
).reset_index()

# Filter districts below threshold
low_accommodation_districts = action_df[action_df["Total_Accommodations"] <= low_threshold]

# Show results
if not low_accommodation_districts.empty:
    st.warning(f"⚠️ {len(low_accommodation_districts)} District(s) have <= {low_threshold} accommodations. Consider investing here!")
    st.dataframe(low_accommodation_districts, use_container_width=True)

    # Optional: Visualize with bar chart
    fig_low = px.bar(
        low_accommodation_districts,
        x="District",
        y="Total_Accommodations",
        color="Total_Accommodations",
        text="Total_Accommodations",
        title="Districts with Low Number of Accommodations",
        template="plotly_dark"
    )
    fig_low.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
    st.plotly_chart(fig_low, use_container_width=True)

else:
    st.success("✅ All districts have sufficient number of accommodations!")


# Data table: Show filtered data
st.subheader("📋 Filtered Accommodation Data")
st.dataframe(filtered_df, use_container_width=True)



## Version 5


In [None]:
# Write the app
%%writefile app.py
# Import necessary libraries
import streamlit as st
import pandas as pd
import plotly.express as px

# Set page configuration
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

# Load data
def load_data():
    file_path = "https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv"
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Sidebar Navigation
st.sidebar.header("🏠 Navigation")
page = st.sidebar.radio(
    "Go to",
    ("About",
     "Accommodation Capacity by Region",
     "Geographic Gaps",
     "Type Distribution",
     "Action Signals")
)

# Sidebar Filters
st.sidebar.header("🔎 Filter Options")

# District filter (multi-select)
all_districts = sorted(df["District"].dropna().unique().tolist())
selected_districts = st.sidebar.multiselect("Select District(s)", options=["All"] + all_districts, default=["All"])

# Accommodation Type filter (multi-select)
all_types = sorted(df["Type"].dropna().unique().tolist())
selected_types = st.sidebar.multiselect("Select Accommodation Type(s)", options=["All"] + all_types, default=["All"])

# Apply filters
filtered_df = df.copy()
if "All" not in selected_districts:
    filtered_df = filtered_df[filtered_df["District"].isin(selected_districts)]
if "All" not in selected_types:
    filtered_df = filtered_df[filtered_df["Type"].isin(selected_types)]

# Main Page Routing
if page == "About":
    st.title("🏨 Accommodation Dashboard")
    st.write("Welcome to the Accommodation Insights Dashboard! Navigate using the sidebar.")

elif page == "Accommodation Capacity by Region":
    st.title("🏙️ Accommodation Capacity by Region")

    # Total number of accommodations per district
    st.subheader("Total Number of Accommodations per District")
    accommodations_per_district = filtered_df.groupby("District")["Name"].count().reset_index().sort_values(by="Name", ascending=False)
    fig1 = px.bar(accommodations_per_district, x="District", y="Name", color="Name", text="Name", template="plotly_dark")
    fig1.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
    st.plotly_chart(fig1, use_container_width=True)

    # Total number of rooms per district
    st.subheader("Total Number of Rooms per District")
    rooms_per_district = filtered_df.groupby("District")["Rooms"].sum().reset_index().sort_values(by="Rooms", ascending=False)
    fig2 = px.bar(rooms_per_district, x="District", y="Rooms", color="Rooms", text="Rooms", template="plotly_dark")
    fig2.update_layout(xaxis_title="District", yaxis_title="Total Rooms", xaxis_tickangle=-45)
    st.plotly_chart(fig2, use_container_width=True)

    # Average size of accommodations
    st.subheader("Average Size of Accommodations (Rooms per Property)")
    avg_rooms_per_property = (filtered_df.groupby("District")["Rooms"].mean()).reset_index().sort_values(by="Rooms", ascending=False)
    fig3 = px.bar(avg_rooms_per_property, x="District", y="Rooms", color="Rooms", text=avg_rooms_per_property["Rooms"].round(1), template="plotly_dark")
    fig3.update_layout(xaxis_title="District", yaxis_title="Average Rooms per Property", xaxis_tickangle=-45)
    st.plotly_chart(fig3, use_container_width=True)

elif page == "Geographic Gaps":
    st.title("🗺️ Geographic Gaps")

    district_counts = filtered_df.groupby("District").agg(Total_Accommodations=("Name", "count")).reset_index()
    map_df = pd.merge(filtered_df, district_counts, on="District", how="left")

    st.map(map_df[["Latitude", "Logitiute"]].rename(columns={"Logitiute": "longitude", "Latitude": "latitude"}))

    low_accommodation_threshold = 5
    low_acc_districts = district_counts[district_counts["Total_Accommodations"] <= low_accommodation_threshold]

    if not low_acc_districts.empty:
        st.warning("⚠️ Districts with very few accommodations (<= 5):")
        st.dataframe(low_acc_districts, use_container_width=True)
    else:
        st.success("✅ All districts have sufficient accommodation coverage.")

elif page == "Type Distribution":
    st.title("🏘️ Accommodation Type Distribution")

    type_distribution = filtered_df["Type"].value_counts().reset_index()
    type_distribution.columns = ["Accommodation Type", "Count"]

    fig_type = px.pie(
        type_distribution,
        names="Accommodation Type",
        values="Count",
        title="Accommodation Types Share",
        hole=0.4,
        template="plotly_dark"
    )
    st.plotly_chart(fig_type, use_container_width=True)

    st.dataframe(type_distribution, use_container_width=True)

elif page == "Action Signals":
    st.title("🚨 Action Signals: Target for Improvements")

    low_threshold = 5
    action_df = filtered_df.groupby("District").agg(
        Total_Accommodations=("Name", "count"),
        Total_Rooms=("Rooms", "sum")
    ).reset_index()

    low_accommodation_districts = action_df[action_df["Total_Accommodations"] <= low_threshold]

    if not low_accommodation_districts.empty:
        st.warning(f"⚠️ {len(low_accommodation_districts)} District(s) have <= {low_threshold} accommodations. Consider investing here!")
        st.dataframe(low_accommodation_districts, use_container_width=True)

        fig_low = px.bar(
            low_accommodation_districts,
            x="District",
            y="Total_Accommodations",
            color="Total_Accommodations",
            text="Total_Accommodations",
            title="Districts with Low Number of Accommodations",
            template="plotly_dark"
        )
        fig_low.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
        st.plotly_chart(fig_low, use_container_width=True)
    else:
        st.success("✅ All districts have sufficient number of accommodations!")


## Version 6

In [None]:
# Write the app
%%writefile app.py
# Import necessary libraries
import streamlit as st
import pandas as pd
import plotly.express as px

# Set page configuration
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

# Load data
def load_data():
    file_path = "https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv"
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Sidebar Navigation
st.sidebar.header("🏠 Navigation")
page = st.sidebar.radio(
    "Go to",
    ("About",
     "Accommodation Capacity by Region",
     "Geographic Gaps",
     "Type Distribution",
     "Action Signals")
)

# Sidebar Filters
st.sidebar.header("🔎 Filter Options")

# District filter (multi-select)
all_districts = sorted(df["District"].dropna().unique().tolist())
selected_districts = st.sidebar.multiselect("Select District(s)", options=["All"] + all_districts, default=["All"])

# Accommodation Type filter (multi-select)
all_types = sorted(df["Type"].dropna().unique().tolist())
selected_types = st.sidebar.multiselect("Select Accommodation Type(s)", options=["All"] + all_types, default=["All"])

# Apply filters
filtered_df = df.copy()
if "All" not in selected_districts:
    filtered_df = filtered_df[filtered_df["District"].isin(selected_districts)]
if "All" not in selected_types:
    filtered_df = filtered_df[filtered_df["Type"].isin(selected_types)]

# Main Page Routing
if page == "About":
    st.markdown("""
    # 📄 About This Dashboard
    ---
    ### Purpose
    This dashboard provides key insights into accommodation trends across **Sri Lanka**.
    It is designed to help **government officials** make informed decisions regarding tourism development, infrastructure investment, and policy planning.

    ### Data Sources
    - Department of Census and Statistics
    - Sri Lanka Tourism Development Authority
    - Other verified public and private datasets

    ### Key Features
    -  Accommodation Capacity
    -  Room availability across the island
    -  Break down of the most common accomodation types
    -  Gap and weak points analysis
    -  Provincial and city-level comparisons

    ### Intended Users
    - Government officials
    - Policymakers
    - Tourism and regional development stakeholders

    ### Disclaimer
    The information provided in this dashboard is for informational purposes only.
    While every effort has been made to ensure the accuracy of the data, users are advised to independently verify critical information before making decisions.
    ---
    """)

elif page == "Accommodation Capacity by Region":
    st.title("🏙️ Accommodation Capacity by Region")

    # Total number of accommodations per district
    st.subheader("Total Number of Accommodations per District")
    accommodations_per_district = filtered_df.groupby("District")["Name"].count().reset_index().sort_values(by="Name", ascending=False)
    fig1 = px.bar(accommodations_per_district, x="District", y="Name", color="Name", text="Name", template="plotly_dark")
    fig1.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
    st.plotly_chart(fig1, use_container_width=True)

    # Total number of rooms per district
    st.subheader("Total Number of Rooms per District")
    rooms_per_district = filtered_df.groupby("District")["Rooms"].sum().reset_index().sort_values(by="Rooms", ascending=False)
    fig2 = px.bar(rooms_per_district, x="District", y="Rooms", color="Rooms", text="Rooms", template="plotly_dark")
    fig2.update_layout(xaxis_title="District", yaxis_title="Total Rooms", xaxis_tickangle=-45)
    st.plotly_chart(fig2, use_container_width=True)

    # Average size of accommodations
    st.subheader("Average Size of Accommodations (Rooms per Property)")
    avg_rooms_per_property = (filtered_df.groupby("District")["Rooms"].mean()).reset_index().sort_values(by="Rooms", ascending=False)
    fig3 = px.bar(avg_rooms_per_property, x="District", y="Rooms", color="Rooms", text=avg_rooms_per_property["Rooms"].round(1), template="plotly_dark")
    fig3.update_layout(xaxis_title="District", yaxis_title="Average Rooms per Property", xaxis_tickangle=-45)
    st.plotly_chart(fig3, use_container_width=True)

elif page == "Geographic Gaps":
    st.title("🗺️ Geographic Gaps")

    district_counts = filtered_df.groupby("District").agg(Total_Accommodations=("Name", "count")).reset_index()
    map_df = pd.merge(filtered_df, district_counts, on="District", how="left")

    st.map(map_df[["Latitude", "Logitiute"]].rename(columns={"Logitiute": "longitude", "Latitude": "latitude"}))

    low_accommodation_threshold = 5
    low_acc_districts = district_counts[district_counts["Total_Accommodations"] <= low_accommodation_threshold]

    if not low_acc_districts.empty:
        st.warning("⚠️ Districts with very few accommodations (<= 5):")
        st.dataframe(low_acc_districts, use_container_width=True)
    else:
        st.success("✅ All districts have sufficient accommodation coverage.")

elif page == "Type Distribution":
    st.title("🏘️ Accommodation Type Distribution")

    type_distribution = filtered_df["Type"].value_counts().reset_index()
    type_distribution.columns = ["Accommodation Type", "Count"]

    fig_type = px.pie(
        type_distribution,
        names="Accommodation Type",
        values="Count",
        title="Accommodation Types Share",
        hole=0.4,
        template="plotly_dark"
    )
    st.plotly_chart(fig_type, use_container_width=True)

    st.dataframe(type_distribution, use_container_width=True)

elif page == "Action Signals":
    st.title("🚨 Action Signals: Target for Improvements")

    low_threshold = 5
    action_df = filtered_df.groupby("District").agg(
        Total_Accommodations=("Name", "count"),
        Total_Rooms=("Rooms", "sum")
    ).reset_index()

    low_accommodation_districts = action_df[action_df["Total_Accommodations"] <= low_threshold]

    if not low_accommodation_districts.empty:
        st.warning(f"⚠️ {len(low_accommodation_districts)} District(s) have <= {low_threshold} accommodations. Consider investing here!")
        st.dataframe(low_accommodation_districts, use_container_width=True)

        fig_low = px.bar(
            low_accommodation_districts,
            x="District",
            y="Total_Accommodations",
            color="Total_Accommodations",
            text="Total_Accommodations",
            title="Districts with Low Number of Accommodations",
            template="plotly_dark"
        )
        fig_low.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
        st.plotly_chart(fig_low, use_container_width=True)
    else:
        st.success("✅ All districts have sufficient number of accommodations!")


## Version 7

In [None]:
# Write the app
%%writefile app.py
# Import necessary libraries
import streamlit as st
import pandas as pd
import plotly.express as px

# Set page configuration
st.set_page_config(page_title="Accommodation Dashboard", layout="wide")

# Load data
def load_data():
    file_path = "https://raw.githubusercontent.com/FrankPerera04/DSPL_Individual_CW/refs/heads/main/processed_accommodation_data%20(1).csv"
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Sidebar Navigation
st.sidebar.header("🏠 Navigation")
page = st.sidebar.radio(
    "Go to",
    ("About",
     "Accommodation Capacity by Region",
     "Geographic Gaps",
     "Type Distribution",
     "Action Signals")
)

# Sidebar Filters
st.sidebar.header("🔎 Filter Options")

# District filter (multi-select)
all_districts = sorted(df["District"].dropna().unique().tolist())
selected_districts = st.sidebar.multiselect("Select District(s)", options=["All"] + all_districts, default=["All"])

# Accommodation Type filter (multi-select)
all_types = sorted(df["Type"].dropna().unique().tolist())
selected_types = st.sidebar.multiselect("Select Accommodation Type(s)", options=["All"] + all_types, default=["All"])

# Apply filters
filtered_df = df.copy()
if "All" not in selected_districts:
    filtered_df = filtered_df[filtered_df["District"].isin(selected_districts)]
if "All" not in selected_types:
    filtered_df = filtered_df[filtered_df["Type"].isin(selected_types)]

# Main Page Routing
if page == "About":
    bg_image_url = "https://github.com/FrankPerera04/DSPL_Individual_CW/raw/main/Images/About.jpeg"

     # Add background and dark overlay
    st.markdown(
         f"""
         <style>
         .stApp {{
             background: linear-gradient(
                 rgba(0, 0, 0, 0.7),
                 rgba(0, 0, 0, 0.7)
             ), url("{bg_image_url}");
             background-size: cover;
             background-position: center;
             background-repeat: no-repeat;
             background-attachment: fixed;
             color: white;
         }}
         .block-container {{
             background-color: rgba(0, 0, 0, 0);
         }}
         </style>
         """,
         unsafe_allow_html=True
     )

    # About Page Content
    st.markdown("""
    # Accomodation Insights
    ---
    ### Purpose
    This dashboard provides key insights into accommodation trends across **Sri Lanka**.
    It is designed to help **government officials** make informed decisions regarding tourism development, infrastructure investment, and policy planning.

    ### Data Sources
    - Department of Census and Statistics
    - Sri Lanka Tourism Development Authority
    - Other verified public and private datasets

    ### Key Features
    - Accommodation Capacity
    - Room availability across the island
    - Breakdown of the most common accommodation types
    - Gap and weak points analysis
    - Provincial and city-level comparisons

    ### Intended Users
    - Government officials
    - Policymakers
    - Tourism and regional development stakeholders

    ### Disclaimer
    The information provided in this dashboard is for informational purposes only.
    While every effort has been made to ensure the accuracy of the data, users are advised to independently verify critical information before making decisions.

    ---
    """)


elif page == "Accommodation Capacity by Region":
    st.title("🏙️ Accommodation Capacity by Region")

    # Total number of accommodations per district
    st.subheader("Total Number of Accommodations per District")
    accommodations_per_district = filtered_df.groupby("District")["Name"].count().reset_index().sort_values(by="Name", ascending=False)
    fig1 = px.bar(accommodations_per_district, x="District", y="Name", color="Name", text="Name", template="plotly_dark")
    fig1.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
    st.plotly_chart(fig1, use_container_width=True)

    # Total number of rooms per district
    st.subheader("Total Number of Rooms per District")
    rooms_per_district = filtered_df.groupby("District")["Rooms"].sum().reset_index().sort_values(by="Rooms", ascending=False)
    fig2 = px.bar(rooms_per_district, x="District", y="Rooms", color="Rooms", text="Rooms", template="plotly_dark")
    fig2.update_layout(xaxis_title="District", yaxis_title="Total Rooms", xaxis_tickangle=-45)
    st.plotly_chart(fig2, use_container_width=True)

    # Average size of accommodations
    st.subheader("Average Size of Accommodations (Rooms per Property)")
    avg_rooms_per_property = (filtered_df.groupby("District")["Rooms"].mean()).reset_index().sort_values(by="Rooms", ascending=False)
    fig3 = px.bar(avg_rooms_per_property, x="District", y="Rooms", color="Rooms", text=avg_rooms_per_property["Rooms"].round(1), template="plotly_dark")
    fig3.update_layout(xaxis_title="District", yaxis_title="Average Rooms per Property", xaxis_tickangle=-45)
    st.plotly_chart(fig3, use_container_width=True)

elif page == "Geographic Gaps":
    st.title("🗺️ Geographic Gaps")

    district_counts = filtered_df.groupby("District").agg(Total_Accommodations=("Name", "count")).reset_index()
    map_df = pd.merge(filtered_df, district_counts, on="District", how="left")

    st.map(map_df[["Latitude", "Logitiute"]].rename(columns={"Logitiute": "longitude", "Latitude": "latitude"}))

    low_accommodation_threshold = 5
    low_acc_districts = district_counts[district_counts["Total_Accommodations"] <= low_accommodation_threshold]

    if not low_acc_districts.empty:
        st.warning("⚠️ Districts with very few accommodations (<= 5):")
        st.dataframe(low_acc_districts, use_container_width=True)
    else:
        st.success("✅ All districts have sufficient accommodation coverage.")

elif page == "Type Distribution":
    st.title("🏘️ Accommodation Type Distribution")

    type_distribution = filtered_df["Type"].value_counts().reset_index()
    type_distribution.columns = ["Accommodation Type", "Count"]

    fig_type = px.pie(
        type_distribution,
        names="Accommodation Type",
        values="Count",
        title="Accommodation Types Share",
        hole=0.4,
        template="plotly_dark"
    )
    st.plotly_chart(fig_type, use_container_width=True)

    st.dataframe(type_distribution, use_container_width=True)

elif page == "Action Signals":
    st.title("🚨 Action Signals: Target for Improvements")

    low_threshold = 5
    action_df = filtered_df.groupby("District").agg(
        Total_Accommodations=("Name", "count"),
        Total_Rooms=("Rooms", "sum")
    ).reset_index()

    low_accommodation_districts = action_df[action_df["Total_Accommodations"] <= low_threshold]

    if not low_accommodation_districts.empty:
        st.warning(f"⚠️ {len(low_accommodation_districts)} District(s) have <= {low_threshold} accommodations. Consider investing here!")
        st.dataframe(low_accommodation_districts, use_container_width=True)

        fig_low = px.bar(
            low_accommodation_districts,
            x="District",
            y="Total_Accommodations",
            color="Total_Accommodations",
            text="Total_Accommodations",
            title="Districts with Low Number of Accommodations",
            template="plotly_dark"
        )
        fig_low.update_layout(xaxis_title="District", yaxis_title="Total Accommodations", xaxis_tickangle=-45)
        st.plotly_chart(fig_low, use_container_width=True)
    else:
        st.success("✅ All districts have sufficient number of accommodations!")
