# Docker Stats Visualization

This notebook reads the `test_stats.csv` generated by our Docker profiling script and visualizations CPU and Memory usage to help us understand kubernetes resource demands.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 6)

In [None]:
# Load the data
columns = ['Timestamp', 'Container', 'CPU(%)', 'MemUsage', 'MemLimit', 'MemPerc', 'NetInput', 'NetOutput', 'BlockInput', 'BlockOutput', 'PIDs']
df = pd.read_csv('../test_stats.csv', names=columns)

# Clean up formatting for percentages and convert to float
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['CPU(%)'] = df['CPU(%)'].str.replace('%', '').astype(float)
df['MemPerc'] = df['MemPerc'].str.replace('%', '').astype(float)

# Helper function to convert memory units to MB for consistent plotting
def convert_to_mb(val_str):
    if pd.isna(val_str):
        return 0
    val_str = str(val_str)
    if 'GiB' in val_str:
        return float(val_str.replace('GiB', '')) * 1024
    elif 'MiB' in val_str:
        return float(val_str.replace('MiB', ''))
    elif 'MB' in val_str:
        return float(val_str.replace('MB', ''))
    elif 'kB' in val_str:
        return float(val_str.replace('kB', '')) / 1024
    elif 'B' in val_str:
        return float(val_str.replace('B', '')) / (1024 * 1024)
    return 0

df['MemUsage_MB'] = df['MemUsage'].apply(convert_to_mb)

df.head()

In [None]:
# Plot CPU Usage Over Time
plt.figure()
sns.lineplot(data=df, x='Timestamp', y='CPU(%)', hue='Container')
plt.title('CPU Usage Over Time (%)')
plt.ylabel('CPU Usage (%)')
plt.xlabel('Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Plot Memory Usage Over Time
plt.figure()
sns.lineplot(data=df, x='Timestamp', y='MemUsage_MB', hue='Container')
plt.title('Memory Usage Over Time (MB)')
plt.ylabel('Memory Usage (MB)')
plt.xlabel('Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()