# Workload Characterization

## Introduction

### Import relevant modules

In [2]:
# Data Analysis
import pandas as pd

# Data Visualization
from matplotlib import pyplot as plt

# Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering

# Utilities
import numpy as np
# import warnings

In [3]:
%config InlineBackend.figure_format = 'retina'

### Read data

#### High level parameters

In [4]:
df_high = pd.read_csv('WC_report_client.csv')
df_high.head()

Unnamed: 0,timeStamp,elapsed,label,responseCode,responseMessage,threadName,dataType,success,failureMessage,bytes,sentBytes,grpThreads,allThreads,URL,Latency,IdleTime,Connect
0,1609259515125,103,HTTP Request - Repubblica Home,200,OK,Thread Group - Slow 1-1,text,True,,394622,142,1,2,http://192.168.1.6/webserver/RepubblicaHome.html,68,0,49
1,1609259515125,128,HTTP Request - Repubblica Home,200,OK,Thread Group - Fast 2-1,text,True,,394622,142,1,2,http://192.168.1.6/webserver/RepubblicaHome.html,69,0,49
2,1609259515420,10,HTTP Request - Wikipedia Home,200,OK,Thread Group - Slow 1-2,text,True,,78492,141,2,4,http://192.168.1.6/webserver/WikipediaHome.html,3,0,0
3,1609259515430,10,HTTP Request - Wikipedia Home,200,OK,Thread Group - Fast 2-2,text,True,,78492,141,2,4,http://192.168.1.6/webserver/WikipediaHome.html,4,0,2
4,1609259515539,15,HTTP Request - Repubblica Page,200,OK,Thread Group - Fast 2-1,text,True,,141098,142,2,4,http://192.168.1.6/webserver/RepubblicaPage.html,3,0,0


In [5]:
print('The dataset contains', df_high.shape[1], 'columns and', df_high.shape[0], 'rows')

The dataset contains 17 columns and 5794 rows


In [6]:
df_high['success'].all()

True

#### Low level parameters

In [7]:
df_low = pd.read_csv('WC_report_server.csv', sep='\s+', skiprows=1)
df_low.rename(columns={'r':'procs_r', 'b':'procs_b',
                       'swpd':'memory_swpd', 'free':'memory_free', 'buff':'memory_buff', 'cache':'memory_cache', 
                       'si':'swap_si', 'so':'swap_so', 
                       'bi':'io_bi', 'bo':'io_bo',
                       'in':'system_in', 'cs':'system_cs',
                       'us':'cpu_us', 'sy':'cpu_sy', 'id':'cpu_id', 'wa':'cpu_wa', 'st':'cpu_st'
                      }, inplace=True)
df_low.head(10)

Unnamed: 0,procs_r,procs_b,memory_swpd,memory_free,memory_buff,memory_cache,swap_si,swap_so,io_bi,io_bo,system_in,system_cs,cpu_us,cpu_sy,cpu_id,cpu_wa,cpu_st
0,0,0,0,2260336,52092,937332,0,0,7,5,122,39,0,0,99,0,0
1,0,0,0,2260336,52092,937332,0,0,0,0,145,96,0,0,100,0,0
2,0,0,0,2260336,52100,937376,0,0,0,24,142,107,0,0,100,0,0
3,0,0,0,2260336,52100,937376,0,0,0,0,130,88,0,0,100,0,0
4,0,0,0,2260336,52100,937376,0,0,0,0,149,98,0,0,100,0,0
5,0,0,0,2260336,52100,937376,0,0,0,0,139,89,0,0,100,0,0
6,0,0,0,2260336,52100,937376,0,0,0,0,145,101,0,0,100,0,0
7,0,0,0,2260336,52100,937376,0,0,0,0,119,79,0,0,100,0,0
8,0,0,0,2260336,52108,937368,0,0,0,12,143,101,0,0,100,0,0
9,1,0,0,2259832,52108,937368,0,0,0,0,293,275,1,1,99,0,0


In [8]:
print('The dataset contains', df_low.shape[1], 'columns and', df_low.shape[0], 'rows')

The dataset contains 17 columns and 310 rows
