# Data Processing Notebook for ISF-2026 BPM data

## Adding Event Markers

### Step 1: Load data and keep relevant columns

Keep Time, ConnectedNodes, and all Node columns.

In [5]:
import pandas as pd
from pathlib import Path

data_path = Path("ISF-2026-BPM-Data.csv")
df = pd.read_csv(data_path)

base_cols = ["Time", "ConnectedNodes"]
node_cols = [c for c in df.columns if c.startswith("Node")]

keep_cols = base_cols + node_cols
df = df[keep_cols]
df.head()

Unnamed: 0,Time,ConnectedNodes,Node01_Person1,Node01_Person2,Node01_Person3,Node02_Person1,Node02_Person2,Node02_Person3,Node03_Person1,Node03_Person2,...,Node17_Person3,Node18_Person1,Node18_Person2,Node18_Person3,Node19_Person1,Node19_Person2,Node19_Person3,Node20_Person1,Node20_Person2,Node20_Person3
0,14:43:26,1,255,255,255,255,255,255,255,255,...,255,108,99,114,255,255,255,255,255,255
1,14:43:26,2,255,255,255,255,255,255,255,255,...,255,99,89,114,255,255,255,255,255,255
2,14:43:26,2,255,255,255,255,255,255,255,255,...,255,106,88,93,255,255,255,255,255,255
3,14:43:26,2,255,255,255,255,255,255,255,255,...,255,101,80,91,255,255,255,255,255,255
4,14:43:32,2,255,255,255,255,255,255,255,255,...,255,139,90,123,255,255,255,255,255,255


### Step 2: Load event markers

Load event markers and prepare for merging.

In [6]:
events_path = Path("ISF-Event-Markers.csv")
events_df = pd.read_csv(events_path)

# Remove Collective BPM rows
events_df = events_df[events_df["Event"] != "Collective BPM"].copy()

# Keep only Time and Event columns
events_df = events_df[["Time(IST)", "Event"]].rename(columns={"Time(IST)": "Time"})
events_df.head()

Unnamed: 0,Time,Event
0,14:57:53,Heart Meter
1,14:58:29,Breathing Calibration
2,15:02:38,First Portrait
3,15:03:23,First performance
4,15:04:44,First freeze frame


### Step 3: Merge events into main data

Add Event column, filling with "Nil" where no event exists.

In [7]:
# Merge events with main data
df = df.merge(events_df, on="Time", how="left")

# Mark duplicate timestamps - only first occurrence keeps the event
df["is_first"] = ~df.duplicated(subset=["Time"], keep="first")

# Set event to "Nil" for duplicate timestamps (not first occurrence)
df.loc[~df["is_first"], "Event"] = "Nil"

# Fill remaining missing events with "Nil"
df["Event"] = df["Event"].fillna("Nil")

# Drop helper column
df = df.drop(columns=["is_first"])

# Reorder columns: Time, ConnectedNodes, Event, then all Node columns
node_cols = [c for c in df.columns if c.startswith("Node")]
df = df[["Time", "ConnectedNodes", "Event"] + node_cols]

df.head(10)

Unnamed: 0,Time,ConnectedNodes,Event,Node01_Person1,Node01_Person2,Node01_Person3,Node02_Person1,Node02_Person2,Node02_Person3,Node03_Person1,...,Node17_Person3,Node18_Person1,Node18_Person2,Node18_Person3,Node19_Person1,Node19_Person2,Node19_Person3,Node20_Person1,Node20_Person2,Node20_Person3
0,14:43:26,1,Nil,255,255,255,255,255,255,255,...,255,108,99,114,255,255,255,255,255,255
1,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,99,89,114,255,255,255,255,255,255
2,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,106,88,93,255,255,255,255,255,255
3,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,101,80,91,255,255,255,255,255,255
4,14:43:32,2,Nil,255,255,255,255,255,255,255,...,255,139,90,123,255,255,255,255,255,255
5,14:43:33,2,Nil,255,255,255,255,255,255,255,...,255,157,103,168,255,255,255,255,255,255
6,14:43:33,2,Nil,255,255,255,255,255,255,255,...,255,130,101,171,255,255,255,255,255,255
7,14:43:35,0,Nil,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
8,14:43:36,2,Nil,255,255,255,255,255,255,255,...,255,118,137,189,255,255,255,255,255,255
9,14:43:37,2,Nil,255,255,255,255,255,255,255,...,255,122,131,184,255,255,255,255,255,255


### Step 4: Save raw data

Export to CSV file.

In [8]:
output_path = Path("ISF-2026-BPM-Data-Raw-with-events.csv")
df.to_csv(output_path, index=False)
print(f"Saved {len(df)} rows to {output_path}")
output_path

Saved 5002 rows to ISF-2026-BPM-Data-Raw-with-events.csv


PosixPath('ISF-2026-BPM-Data-Raw-with-events.csv')

## Filtering raw data

### Step 1: Create a copy 

In [9]:
# Create a copy for processing, keep original as df_raw
df_raw = df.copy()
df_processed = df.copy()

print(f"Raw data: {len(df_raw)} rows")
print(f"Processing data: {len(df_processed)} rows")

Raw data: 5002 rows
Processing data: 5002 rows


### Step 2: Remove Node14
Drop Node14 columns (disconnected node).

In [10]:
node14_cols = [c for c in df_processed.columns if c.startswith("Node14")]
df_processed = df_processed.drop(columns=node14_cols)
df_processed.head()

Unnamed: 0,Time,ConnectedNodes,Event,Node01_Person1,Node01_Person2,Node01_Person3,Node02_Person1,Node02_Person2,Node02_Person3,Node03_Person1,...,Node17_Person3,Node18_Person1,Node18_Person2,Node18_Person3,Node19_Person1,Node19_Person2,Node19_Person3,Node20_Person1,Node20_Person2,Node20_Person3
0,14:43:26,1,Nil,255,255,255,255,255,255,255,...,255,108,99,114,255,255,255,255,255,255
1,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,99,89,114,255,255,255,255,255,255
2,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,106,88,93,255,255,255,255,255,255
3,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,101,80,91,255,255,255,255,255,255
4,14:43:32,2,Nil,255,255,255,255,255,255,255,...,255,139,90,123,255,255,255,255,255,255


### Step 3: Filter rows

Remove rows where ConnectedNodes < 2.

In [11]:
df_processed = df_processed[df_processed["ConnectedNodes"] >= 2].copy()
df_processed.reset_index(drop=True, inplace=True)
df_processed.head()

Unnamed: 0,Time,ConnectedNodes,Event,Node01_Person1,Node01_Person2,Node01_Person3,Node02_Person1,Node02_Person2,Node02_Person3,Node03_Person1,...,Node17_Person3,Node18_Person1,Node18_Person2,Node18_Person3,Node19_Person1,Node19_Person2,Node19_Person3,Node20_Person1,Node20_Person2,Node20_Person3
0,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,99,89,114,255,255,255,255,255,255
1,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,106,88,93,255,255,255,255,255,255
2,14:43:26,2,Nil,255,255,255,255,255,255,255,...,255,101,80,91,255,255,255,255,255,255
3,14:43:32,2,Nil,255,255,255,255,255,255,255,...,255,139,90,123,255,255,255,255,255,255
4,14:43:33,2,Nil,255,255,255,255,255,255,255,...,255,157,103,168,255,255,255,255,255,255


### Step 4: Save cleaned data

Export to CSV file.

In [12]:
output_path = Path("ISF-2026-BPM-Data-Processed-with-events.csv")
df_processed.to_csv(output_path, index=False)
print(f"Saved {len(df_processed)} rows to {output_path}")
output_path

Saved 4918 rows to ISF-2026-BPM-Data-Processed-with-events.csv


PosixPath('ISF-2026-BPM-Data-Processed-with-events.csv')