### Initial setup

edit *players.db* to point to your actual database location

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
conn = sqlite3.connect('file:/var/lib/tf2-surveilance/players.db?mode=ro', uri=True) 

In [None]:
def format_duration(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{int(hours):02d}h {int(minutes):02d}m {int(seconds):02d}s"

def calculate_points(events_df, session_start, session_end):
    total_points = 0
    last_points = 0
    # Filter events within the session time frame and sort them
    session_events = events_df[(events_df['created_at'] >= session_start) & (events_df['created_at'] <= session_end)]
    session_events_sorted = session_events.sort_values(by='created_at')

    for _, event in session_events_sorted.iterrows():
        current_points = int(event['event_data'])
        # Check if points are reset to 0 (or near 0, considering a threshold for logging lag)
        if current_points <= 1:  # Assuming 1 as the threshold for logging lag
            total_points += last_points
            last_points = 0
        else:
            last_points = current_points

    total_points += last_points  # Add the last set of points
    return total_points

def player_sessions_by_name(player_name, conn):
    session_query = f"""
    SELECT  s.session_id, s.player_id, s.server_id, s.score AS session_score, s.duration, s.joined_at, s.left_at, p.name AS player_name, srv.address AS server_address
    FROM sessions AS s
    JOIN players AS p ON s.player_id = p.player_id
    JOIN servers AS srv ON s.server_id = srv.server_id
    WHERE p.name = ?
    """
    sessions_df = pd.read_sql_query(session_query, conn, params=[player_name])

    # Fetch player events data with the event type 'point change', including player_id, server_id, and created_at
    events_query = """
    SELECT  player_id,  server_id,  event_data, created_at
    FROM player_events
    WHERE event_type = 'point change'
    """
    events_df = pd.read_sql_query(events_query, conn)

    # Calculate points for each session
    sessions_df['total_points'] = sessions_df.apply(
        lambda row: calculate_points(
            events_df[(events_df['player_id'] == row['player_id']) & (events_df['server_id'] == row['server_id'])],
            row['joined_at'],
            row['left_at']
        ), 
        axis=1
    )

    sessions_df['avg_points_per_minute'] = sessions_df.apply(
        lambda row: row['total_points'] / (row['duration'] / 60) if row['duration'] > 0 else 0,
        axis=1
    )

    sessions_df["formatted_duration"] = sessions_df["duration"].apply(format_duration)

    return sessions_df

# TF2 Surveillance war room

With some building blocks provided above it is relatively easy to create various snippets of code to graph data about specific targets or gain other insight into collected data, some examples will be provided.

most will require slight edits, eg: pointing it at the corret files.

many modules will rely on the data loaded by previous modules

In [None]:
# Read all sessions from a set group of servers

# Many other modules rely on this one being run first

with open('<target server file goes here>', 'r') as file:
    filter_server_addresses = [line.strip() for line in file]

# Build the SQL query for sessions, dynamically adding a WHERE clause if server addresses are provided
session_query = """
SELECT  s.session_id, s.player_id, s.server_id, s.score AS session_score, s.duration, s.joined_at, s.left_at, p.name AS player_name, srv.address AS server_address
FROM sessions AS s
JOIN players AS p ON s.player_id = p.player_id
JOIN servers AS srv ON s.server_id = srv.server_id
"""

if filter_server_addresses:
    placeholders = ', '.join(['?'] * len(filter_server_addresses))
    session_query += f"WHERE srv.address IN ({placeholders})"

all_sessions_df = pd.read_sql_query(session_query, conn, params=filter_server_addresses if filter_server_addresses else None)

# Fetch player events data with the event type 'point change'
events_query = """
SELECT  player_id,  server_id,  event_data, created_at
FROM player_events
WHERE event_type = 'point change'
"""
events_df = pd.read_sql_query(events_query, conn)

# Calculate points for each session
all_sessions_df['total_points'] = all_sessions_df.apply(
    lambda row: calculate_points(
        events_df[(events_df['player_id'] == row['player_id']) & (events_df['server_id'] == row['server_id'])],
        row['joined_at'],
        row['left_at']
    ), 
    axis=1
)

all_sessions_df['avg_points_per_minute'] = all_sessions_df.apply(
    lambda row: row['total_points'] / (row['duration'] / 60) if row['duration'] > 0 else 0,
    axis=1
)

all_sessions_df["duration_formatted"] = all_sessions_df["duration"].apply(format_duration)
print(all_sessions_df.shape)
all_sessions_df.head()

In [None]:
# Collapse all sessions by player

# this must be run after the previous module
# this code adds up the sessions of every player

grouped_df = all_sessions_df.groupby(['player_id', 'player_name']).agg(
    total_duration=('duration', 'sum'),     # Sum of duration
    total_points=('total_points', 'sum'),   # Sum of total_points
    avg_points_per_minute=('avg_points_per_minute', 'mean')  # Average of avg_points_per_minute
).reset_index()

grouped_df["total_duration"] = grouped_df["total_duration"].apply(format_duration)

grouped_df.head()

In [None]:
# Graph instantaneous player count

# Assuming all_sessions_df is already populated with session data
# Convert 'joined_at' and 'left_at' to datetime
all_sessions_df['joined_at'] = pd.to_datetime(all_sessions_df['joined_at'])
all_sessions_df['left_at'] = pd.to_datetime(all_sessions_df['left_at'])

# Create a DataFrame for join and leave events
join_events = all_sessions_df[['joined_at', 'player_id']].rename(columns={'joined_at': 'timestamp'})
join_events['event'] = 1  # +1 for a player joining
leave_events = all_sessions_df[['left_at', 'player_id']].rename(columns={'left_at': 'timestamp'})
leave_events['event'] = -1  # -1 for a player leaving

# Combine and sort the events
all_events = pd.concat([join_events, leave_events]).sort_values(by='timestamp')

# Calculate the cumulative sum of players present
all_events['cumulative_players'] = all_events['event'].cumsum()

import matplotlib.dates as mdates

# Concise plotting of the instantaneous player count over time with every date displayed
plt.figure(figsize=(12, 6))
plt.plot(all_events['timestamp'], all_events['cumulative_players'], 'bo-', markersize=0)
plt.title('Instantaneous Player Count Over Time')
plt.xlabel('Time')
plt.ylabel('Number of Players')

# Setting the x-axis to display every date
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))  # Display a tick for every day
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d (%a)'))
plt.gcf().autofmt_xdate()  # Auto-format for better date representation

plt.xticks(rotation=45)

plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Grab player sessions by a players name

player_name = '<player>'

sessions_df = player_sessions_by_name(player_name, conn)

sessions_df.head()

print("Target: ", player_name, ", First Seen: ", sessions_df['joined_at'].min())

avg_points_per_minute = sessions_df['avg_points_per_minute'].mean()
max_points_per_minute = sessions_df['avg_points_per_minute'].max()
print("Average Points per Minute: ", avg_points_per_minute, " Max: ", max_points_per_minute)

temp_df = sessions_df.copy()
temp_df['joined_at'] = pd.to_datetime(temp_df['joined_at'])
temp_df['date'] = temp_df['joined_at'].dt.date

# Create a complete date range from the start to the end of your data
start_date = temp_df['date'].min()
end_date = temp_df['date'].max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D').date

# Create a DataFrame from this date range
all_dates_df = pd.DataFrame(all_dates, columns=['date'])

# Merge the complete date range with your data, filling missing values with 0
merged_df = pd.merge(all_dates_df, temp_df.groupby('date')['duration'].sum().reset_index(), on='date', how='left')
merged_df['duration'] = merged_df['duration'].fillna(0)

# Now you have a DataFrame with all dates, including those with no activity, where duration is set to 0
daily_playtime = merged_df['duration']

avg_session_length = sessions_df['duration'].mean()
max_session_length = sessions_df['duration'].max()
avg_session_length_per_day = daily_playtime.mean()
max_session_length_per_day = daily_playtime.max()
total_session_length = sessions_df['duration'].sum()
print("Average session length: ", format_duration(avg_session_length), " Max: ", format_duration(max_session_length), " Avg/day: ", format_duration(avg_session_length_per_day), " Max/day: ", format_duration(max_session_length_per_day), " Total/Lifetime: ", format_duration(total_session_length) )

print(sessions_df.shape)

sessions_df.head()

In [None]:
# Graph players average playtime during each day of the week

# Uses dataframe from above

# Convert 'joined_at' to datetime and extract day of the week
sessions_df['day_of_week'] = pd.to_datetime(sessions_df['joined_at']).dt.dayofweek

# Group by day of the week and calculate average duration
average_duration_per_day = sessions_df.groupby('day_of_week')['duration'].mean()

# Convert seconds to hours
average_duration_per_day_hours = average_duration_per_day / 3600

# Initialize a Series for all days of the week with 0
all_days_series = pd.Series([0, 0, 0, 0, 0, 0, 0], index=[0, 1, 2, 3, 4, 5, 6])

# Update the Series with actual average durations (in hours)
all_days_series.update(average_duration_per_day_hours)

# Rename index to actual day names
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
all_days_series.index = days

# Plotting the bar graph
plt.figure(figsize=(6, 4))
all_days_series.plot(kind='bar')
plt.title('Average Play Duration Per Day of the Week ('+player_name+')')
plt.xlabel('Day of the Week')
plt.ylabel('Average Duration (Hours)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Graph of player activities over time

# multiple players are supported and will be stacked 
# from top to bottom on the graph on each day

import matplotlib.dates as mdates
from datetime import timedelta

# List of player names
player_names = ['<player1>','<player2>']  # Add more players as needed

# Dictionary to hold session data for each player
players_sessions = {player_name: player_sessions_by_name(player_name, conn) for player_name in player_names}

# Normalize times to a common year for plotting
base_date = pd.Timestamp(year=2000, month=1, day=1)
for sessions in players_sessions.values():
    sessions['joined_at'] = pd.to_datetime(sessions['joined_at'])
    sessions['left_at'] = pd.to_datetime(sessions['left_at'])
    sessions['normalized_joined_at'] = sessions['joined_at'].apply(lambda x: base_date + timedelta(hours=x.hour, minutes=x.minute))
    sessions['normalized_left_at'] = sessions['left_at'].apply(lambda x: base_date + timedelta(hours=x.hour, minutes=x.minute))

# Creating a color map for each unique server_id
all_sessions = pd.concat(players_sessions.values())
unique_servers = all_sessions['server_id'].unique()
colors = plt.cm.get_cmap('hsv', len(unique_servers))
server_color_map = {server_id: colors(i) for i, server_id in enumerate(unique_servers)}

# Creating a plot
fig, ax = plt.subplots(figsize=(18, 6))

# Determine the overall date range to cover all players
start_date = all_sessions['joined_at'].min().normalize()
end_date = all_sessions['left_at'].max().normalize()
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

# Calculating the offset for each player's sessions
odd = 0.0
num_players = len(player_names)
if num_players % 2 == 0:
    odd = 0.1
offsets = [((i - num_players // 2) * 0.2) + odd for i in range(num_players)[::-1]]


title = "Online Periods by Server, Players: "
idx = 1
for player in player_names:
    title = title + player +" ("+str(idx)+"), "
    idx += 1

# Plotting sessions for all players
for i, single_date in enumerate(date_range):
    for player_offset, (player_name, player_sessions) in zip(offsets, players_sessions.items()):
        daily_sessions = player_sessions[player_sessions['joined_at'].dt.normalize() == single_date]
        for _, session in daily_sessions.iterrows():
            color = server_color_map[session['server_id']]
            # Handle sessions extending past midnight
            if session['joined_at'].date() == session['left_at'].date():
                start_time = mdates.date2num(session['normalized_joined_at'])
                end_time = mdates.date2num(session['normalized_left_at'])
                ax.plot([start_time, end_time], [i + player_offset, i + player_offset], color=color, linewidth=6)
            else:
                end_of_day = base_date + timedelta(hours=23, minutes=59, seconds=59)
                start_of_next_day = base_date
                ax.plot([mdates.date2num(session['normalized_joined_at']), mdates.date2num(end_of_day)], 
                        [i + player_offset, i + player_offset], color=color, linewidth=6)
                ax.plot([mdates.date2num(start_of_next_day), mdates.date2num(session['normalized_left_at'])], 
                        [i + 1 + player_offset, i + 1 + player_offset], color=color, linewidth=6)

# Formatting the plot
ax.set_yticks(range(len(date_range)))
ax.set_yticklabels([d.strftime('%Y-%m-%d (%a)') for d in date_range])
ax.xaxis_date()
date_format = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(date_format)
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))

ax.set_xlim(base_date, base_date + timedelta(hours=23, minutes=59))

plt.title(title)
plt.xlabel('Time of Day')
plt.ylabel('Date')
plt.grid(True)
plt.tight_layout()
plt.show()
