###  Social Life Index

The **Social Life Index** reflects how actively people gather and interact across Warsaw’s districts.
It’s derived from anonymized network data by estimating how many unique users are present together within short time intervals.
Districts with higher simultaneous user activity receive higher scores, indicating stronger social dynamics and more vibrant local life.
This index provides an intuitive, data-driven view of community presence across the city

In [35]:
import pandas as pd
import numpy as np

In [52]:
df = pd.read_csv("./data/hackplay_warszawa_with_districts.csv")

In [54]:
slot = "15min"
min_slots_per_district = 50
lower_q, upper_q = 0.05, 0.95

In [57]:
df["start_dttm"] = pd.to_datetime(df["start_dttm"], errors="coerce")
tmp = df.dropna(subset=["start_dttm"]).copy()
tmp["time_slot"] = tmp["start_dttm"].dt.floor(slot)

          district  rows  social_life_score
0             wola   374              100.0
1   śródmieście   361               99.1
2   praga południe   285               93.4
3         ursynów   275               92.6
4         mokotów   269               92.1
5       białołęka   256               90.9
6            wawer   208               85.9
7          bielany   177               82.0
8           ochota   161               79.7
9           bemowo   147               77.5
10   praga północ   119               72.4
11       targówek   114               71.4
12          włochy    99               68.0
13       żoliborz    67               58.7
14           ursus    65               58.0
15        wilanów    61               56.5
16          wesoła    37               44.6
17      rembertów     5                0.0


In [58]:
g = (
    tmp.groupby(["district", "cell_rk", "time_slot"], as_index=False)["user_id"]
       .nunique()
       .rename(columns={"user_id": "n_users"})
)
g["co_presence"] = g["n_users"] * (g["n_users"] - 1) / 2

In [59]:
def winsorize_cell(s: pd.Series, lq=0.01, uq=0.99):
    lo = s.quantile(lq)
    hi = s.quantile(uq)
    return s.clip(lo, hi)

In [60]:
g["co_presence_w"] = g.groupby(["district", "cell_rk"])["co_presence"].transform(winsorize_cell)


In [61]:
cell_medians = (
    g.groupby(["district", "cell_rk"], as_index=False)["co_presence_w"]
     .median()
     .rename(columns={"co_presence_w": "cell_median_copres"})
)


In [62]:
district_cells = cell_medians.groupby("district")["cell_rk"].nunique().rename("n_active_cells")
district_score = (
    cell_medians.groupby("district", as_index=False)["cell_median_copres"].median()
                .rename(columns={"cell_median_copres": "median_copres_per_cell"})
    .merge(district_cells, on="district", how="left")
)
district_score["normalized_copres"] = district_score["median_copres_per_cell"] * np.sqrt(district_score["n_active_cells"])


In [63]:
slots_per_district = g.groupby("district")["time_slot"].nunique().rename("n_slots")
district_score = district_score.merge(slots_per_district, on="district", how="left")
district_score = district_score[district_score["n_slots"] >= min_slots_per_district].copy()

In [64]:
vals = district_score["normalized_copres"]
lo, hi = vals.quantile(lower_q), vals.quantile(upper_q)
if hi == lo:
    district_score["social_life_score"] = 100.0
else:
    district_score["social_life_score"] = ((vals.clip(lo, hi) - lo) / (hi - lo) * 100).round(1)

In [65]:
social_life = (
    district_score[["district", "social_life_score", "median_copres_per_cell", "n_active_cells", "n_slots"]]
    .sort_values("social_life_score", ascending=False)
    .reset_index(drop=True)
)

In [None]:
social_life.to_csv("warsaw_social_life_scores.csv", index=False)

social_life.head(10)