# GPU Monitoring
...

## Load Data into Dataframes

In [5]:
%pip install pandas


Collecting pandas
  Using cached pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl (12.5 MB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.1-py2.py3-none-any.whl (505 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2023.4-py2.py3-none-any.whl (346 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.0 pytz-2024.1 tzdata-2023.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [10]:
import sqlite3
import pandas as pd

connection = sqlite3.connect("data/gpu_monitor.db")

gpu_infos: pd.DataFrame = pd.read_sql_query("SELECT * FROM 'gpu_infos';", connection)
process_infos: pd.DataFrame = pd.read_sql_query("SELECT * FROM 'process_infos';", connection)

connection.close()

In [33]:
from datetime import datetime

def split_process_infos(text: str) -> pd.Series:
    components = text.strip().split()
    cpu_percentage = float(components[0])
    memory_percentage = float(components[1])
    user = components[2]
    created_at = " ".join(components[4:7])
    cmd = " ".join(components[7:])
    return pd.Series([cpu_percentage, memory_percentage, user, created_at, cmd])

process_infos[["cpu_percentage", "memory_percentage", "user", "created_at", "cmd"]] = process_infos["pid_info"].apply(split_process_infos)

In [49]:
gpu_infos.head()

Unnamed: 0,pid,gpu_memory,host_id,timestamp
0,4164634,1711 MiB,teach2,2024-01-29 13:37:27
1,441525,1887 MiB,teach2,2024-01-29 13:37:27
2,1823085,80890 MiB,teach3,2024-01-29 13:37:28
3,1823085,30566 MiB,teach3,2024-01-29 13:37:28
4,4164634,1711 MiB,teach2,2024-01-29 13:44:56


In [50]:
process_infos.head()

Unnamed: 0,pid,pid_info,host_id,timestamp,cpu_percentage,memory_percentage,user,created_at,cmd
0,4164634,8.9 1.6 root Ssl Jan 18 271:42 /usr/loc...,teach2,2024-01-29 13:37:27,8.9,1.6,root,Jan 18 271:42,/usr/local/bin/python -m ipykernel_launcher -f...
1,441525,0.8 13.1 root Ssl Jan 26 609:36 /usr/loc...,teach2,2024-01-29 13:37:27,0.8,13.1,root,Jan 26 609:36,/usr/local/bin/python -m ipykernel_launcher -f...
2,1823085,0.9 55.5 joerg Ssl 13:00 20:45 /usr/bin...,teach3,2024-01-29 13:37:28,0.9,55.5,joerg,13:00 20:45 /usr/bin/python3,-m ipykernel_launcher -f /root/.local/share/ju...
3,1823085,0.9 55.5 joerg Ssl 13:00 20:45 /usr/bin...,teach3,2024-01-29 13:37:28,0.9,55.5,joerg,13:00 20:45 /usr/bin/python3,-m ipykernel_launcher -f /root/.local/share/ju...
4,4164634,8.9 1.6 root Ssl Jan 18 271:42 /usr/loc...,teach2,2024-01-29 13:44:56,8.9,1.6,root,Jan 18 271:42,/usr/local/bin/python -m ipykernel_launcher -f...


In [None]:
# TODO: Parse types of timestamp and floats

## Analyse Data
Interesting analysis questions include:
* In which context do gpu intensive programs run?
* How is gpu usage distributed? (visualize over time)
* Which resources do gpu-using processes also use? (memory, cpu)
* ¬¬¬¬¬¬