In [17]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# ---------------------------------------
# 1) Load data
# ---------------------------------------
CSV_PATH = "dataverse_files/201501-wifi-raw.csv"  # Update this path as needed, in this case: January 2015 data
df = pd.read_csv(CSV_PATH, parse_dates=["Date"], infer_datetime_format=True)  # if Date exists

# Basic sanity
if "Floor" not in df.columns:
    raise ValueError("CSV must have a 'Floor' column.")
if "Traffic-Both-Byte" not in df.columns:
    raise ValueError("CSV must have 'Traffic-Both-Byte' column.")

# Clean traffic column
df["Traffic-Both-Byte"] = pd.to_numeric(df["Traffic-Both-Byte"], errors="coerce").fillna(0)

# ---------------------------------------
# 2) Building geometry & floors
# ---------------------------------------
old_wing = [(0, 0), (62, 0), (62, 40), (0, 40)]
new_wing = [(66, 6), (118, 6), (118, 44), (66, 44)]

# For drawing labels only (not for mapping)
floors_for_drawing = [
    ("LG4", -12.0),
    ("LG3", -8.0),
    ("LG1", -4.0),
    ("G/F", 0.0),
    ("1/F", 4.0),
]

# Mapping for your actual data labels: ['1','G','LG1','LG3','LG4']
def floor_to_z(label):
    if not isinstance(label, str):
        return None
    lab = label.strip().upper()
    if lab == "LG4": return -12.0
    if lab == "LG3": return -8.0
    if lab == "LG1": return -4.0
    if lab == "G":   return 0.0
    if lab == "1":   return 4.0
    return None

# Compute mapping without filtering to check coverage
z_series = df["Floor"].astype(str).map(floor_to_z)
mapped_count = z_series.notna().sum()
print("Mapped Before Filtering:", mapped_count, "of", len(df))
if mapped_count != len(df):
    print("\nUnmapped Floors (top 20):")
    print(df.loc[z_series.isna(), "Floor"].value_counts().head(20))

# Attach z and drop rows that don't map to a floor (e.g., All-Floors)
df = df.assign(_z_floor=z_series)
df = df[df["_z_floor"].notna()].copy()

# ---------------------------------------
# 3) Prepare per-row plotting coordinates
# ---------------------------------------
np.random.seed(42)

def random_point_in_rect(rect):
    (x1, y1) = rect[0]      # bottom-left
    (x2, y2) = rect[2]      # top-right
    x = np.random.uniform(min(x1, x2), max(x1, x2))
    y = np.random.uniform(min(y1, y2), max(y1, y2))
    return x, y

# Assign each row to a wing at random and sample x,y in that wing
wing_choice = np.random.rand(len(df)) < 0.5  # True -> old_wing, False -> new_wing
xs, ys = [], []
for choose_old in wing_choice:
    rect = old_wing if choose_old else new_wing
    x, y = random_point_in_rect(rect)
    xs.append(x); ys.append(y)

df["_x"] = xs
df["_y"] = ys
df["_z"] = df["_z_floor"] + np.random.uniform(0.5, 1.8, len(df))  # float above floor

# ---------------------------------------
# 4) Visual encodings (color, size, hover)
# ---------------------------------------
# Users column (optional)
possible_user_cols = [
    "Users", "Unique-Users", "Unique Users", "Client-Count",
    "Num-Users", "Total-Users", "Associated-Users", "Associations", "Clients"
]
user_col = next((c for c in possible_user_cols if c in df.columns), None)
if user_col is None:
    df["Users"] = 1
    user_col = "Users"

# Normalize traffic for color/size
t = df["Traffic-Both-Byte"].astype(float).values
tmax = t.max() if t.max() > 0 else 1.0
tnorm = t / tmax
msize = (tnorm * 18.0 + 2.0)

# Prepare customdata for hover
custom = np.column_stack([
    df["Floor"].astype(str).values,
    df.get("Date", pd.Series([""] * len(df))).astype(str).values,
    df["Traffic-Both-Byte"].astype(np.int64).values,
    df[user_col].astype(np.int64).values,
])

# ---------------------------------------
# 5) Build figure
# ---------------------------------------
fig = go.Figure()

# Draw stacked floor outlines for the floors that are actually present in df
present_zs = set(df["_z_floor"].unique().tolist())
for name, z in floors_for_drawing:
    if z not in present_zs:
        continue
    for idx, poly in enumerate([old_wing, new_wing]):  # as shown in HKU Library map (two wings)
        x_loop = [p[0] for p in poly] + [poly[0][0]]
        y_loop = [p[1] for p in poly] + [poly[0][1]]
        color = "#1f77b4" if idx == 0 else "#ff7f0e"
        fig.add_trace(go.Scatter3d(
            x=x_loop, y=y_loop, z=[z]*len(x_loop),
            mode="lines",
            line=dict(width=2, color=color),
            name=f"{name} {'Old' if idx==0 else 'New'} Wing",
            showlegend=(idx == 0)  # one legend row per floor
        ))

# Add one marker per CSV row
fig.add_trace(go.Scatter3d(
    x=df["_x"], y=df["_y"], z=df["_z"],
    mode="markers",
    customdata=custom,
    marker=dict(
        size=msize,
        color=tnorm,
        colorscale="inferno",
        cmin=0, cmax=1,
        opacity=0.75,
        symbol="circle",
        showscale=True,
        colorbar=dict(
            title="Wi-Fi Traffic<br>(0 = Low, 1 = High)",
            titleside="right",
            tickvals=[0, 0.25, 0.5, 0.75, 1],
            ticktext=["Low", "", "Medium", "", "High"],
            len=0.75,
            x=1.05
        )
    ),
    # Creates detailed hover info
    hovertemplate=(
        "Floor: %{customdata[0]}<br>"
        "Date: %{customdata[1]}<br>"
        "Traffic: %{customdata[2]:,} bytes<br>"
        "Users: %{customdata[3]:,}<br>"
        "x: %{x:.1f}, y: %{y:.1f}, z: %{z:.1f}"
        "<extra></extra>"
    ),
    name="Users/Sessions (1 dot per row)"
))

# Helpful caption for size meaning
fig.update_layout(
    title="HKU Library — 3D Floors (One Dot per Row)",
    scene=dict(
        xaxis_title="X (m)", yaxis_title="Y (m)", zaxis_title="Elevation (m)",
        aspectmode="data",
        xaxis=dict(showgrid=True),
        yaxis=dict(showgrid=True),
        zaxis=dict(showgrid=True),
    ),
    # Adjust legend and margins
    legend=dict(orientation="h", y=1.08, x=0),
    margin=dict(l=0, r=0, t=50, b=0),
    annotations=[
        dict(
            text="Marker color → traffic intensity (normalized)\n"
                 "Marker size → traffic volume",
            showarrow=False,
            xref="paper", yref="paper",
            x=0.01, y=0.95,
            align="left",
            bgcolor="rgba(255,255,255,0.7)",
            bordercolor="black"
        )
    ]
)

fig.show()

Mapped Before Filtering: 1240 of 1488

Unmapped Floors (top 20):
Floor
All-Floors    248
Name: count, dtype: int64



The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.

