In [55]:
import pandas as pd
import numpy as np
import openpyxl

In [56]:
df = pd.read_csv("Manual_Log.csv", sep = ";", error_bad_lines=False)
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage,Transaction_ID,To_Server,Message
0,0.464448,Client,INFO,0.0,0.0,1,A,Request from customer Transaction ID: 1
1,0.759124,Client,INFO,0.0,0.0,2,A,Request from customer Transaction ID: 2
2,1.30257,A,INFO,0.026,0.896,1,B,Send Authentication request to server B Transa...
3,1.613766,A,INFO,0.031,0.151,2,B,Send Authentication request to server B Transa...
4,3.14493,Client,INFO,0.0,0.0,3,A,Request from customer Transaction ID: 3
5,4.465991,Client,INFO,0.0,0.0,4,A,Request from customer Transaction ID: 4
6,4.541813,B,INFO,0.046,0.792,1,A,Return Authentication request to server A Tran...
7,4.675912,A,INFO,0.926,0.696,1,C,Request Balance to server C Transaction ID: 1 ...
8,5.766717,Client,INFO,0.0,0.0,5,A,Request from customer Transaction ID: 5
9,5.777693,C,INFO,0.868,0.394,1,A,Return Balance to server A Transaction ID: 1 C...


In [57]:
# Split dataframe into three dataframes based on Server
df = df.drop(["Transaction_ID","To_Server", "Message"], axis=1)
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage
0,0.464448,Client,INFO,0.0,0.0
1,0.759124,Client,INFO,0.0,0.0
2,1.30257,A,INFO,0.026,0.896
3,1.613766,A,INFO,0.031,0.151
4,3.14493,Client,INFO,0.0,0.0
5,4.465991,Client,INFO,0.0,0.0
6,4.541813,B,INFO,0.046,0.792
7,4.675912,A,INFO,0.926,0.696
8,5.766717,Client,INFO,0.0,0.0
9,5.777693,C,INFO,0.868,0.394


In [58]:
# Reset index
df = df.reset_index()
df = df.drop(["index"], axis=1)
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage
0,0.464448,Client,INFO,0.0,0.0
1,0.759124,Client,INFO,0.0,0.0
2,1.30257,A,INFO,0.026,0.896
3,1.613766,A,INFO,0.031,0.151
4,3.14493,Client,INFO,0.0,0.0
5,4.465991,Client,INFO,0.0,0.0
6,4.541813,B,INFO,0.046,0.792
7,4.675912,A,INFO,0.926,0.696
8,5.766717,Client,INFO,0.0,0.0
9,5.777693,C,INFO,0.868,0.394


In [59]:
# Convert Time from seconds to minutes and name the new time Time_floor
df["Time"]=df["Time"].div(60)
df["Time_floor"] = np.floor(df["Time"]).astype("int")
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage,Time_floor
0,0.007741,Client,INFO,0.0,0.0,0
1,0.012652,Client,INFO,0.0,0.0,0
2,0.02171,A,INFO,0.026,0.896,0
3,0.026896,A,INFO,0.031,0.151,0
4,0.052415,Client,INFO,0.0,0.0,0
5,0.074433,Client,INFO,0.0,0.0,0
6,0.075697,B,INFO,0.046,0.792,0
7,0.077932,A,INFO,0.926,0.696,0
8,0.096112,Client,INFO,0.0,0.0,0
9,0.096295,C,INFO,0.868,0.394,0


In [60]:
# Only select rows with Message_type equal to INFO
df_info = df.loc[df["Message_type"] == "INFO"]
df_info

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage,Time_floor
0,0.007741,Client,INFO,0.000,0.000,0
1,0.012652,Client,INFO,0.000,0.000,0
2,0.021710,A,INFO,0.026,0.896,0
3,0.026896,A,INFO,0.031,0.151,0
4,0.052415,Client,INFO,0.000,0.000,0
...,...,...,...,...,...,...
59995,822.402042,A,INFO,0.173,0.184,822
59996,822.429578,A,INFO,0.834,0.711,822
59997,822.523230,A,INFO,0.179,0.111,822
59998,822.609096,C,INFO,0.371,0.412,822


In [61]:
# Group rows by Server and Time_floor; drop obsolete Time column
df_grouped = df_info.groupby(["Server","Time_floor"], as_index=False).mean()
df_grouped = df_grouped.drop(["Time"], axis=1)
df_grouped.head(10)

Unnamed: 0,Server,Time_floor,CPU Usage,Memory Usage
0,A,0,0.454487,0.493897
1,A,1,0.486537,0.424683
2,A,2,0.537163,0.434209
3,A,3,0.520069,0.497034
4,A,4,0.458788,0.495404
5,A,5,0.498604,0.505979
6,A,6,0.451667,0.5474
7,A,7,0.5226,0.450167
8,A,8,0.558875,0.517062
9,A,9,0.604913,0.454217


In [62]:
# Unpivot dataframe from wide format to long format
df_melt = pd.melt(df_grouped, id_vars = ["Server", "Time_floor"], 
                  value_vars = ["CPU Usage","Memory Usage"], 
                  value_name = "Value")

In [63]:
# Sort dataframe by Server and Time_floor
df_melt = df_melt.sort_values(by=["Server", "Time_floor"])
df_melt.head(10)

Unnamed: 0,Server,Time_floor,variable,Value
0,A,0,CPU Usage,0.454487
3292,A,0,Memory Usage,0.493897
1,A,1,CPU Usage,0.486537
3293,A,1,Memory Usage,0.424683
2,A,2,CPU Usage,0.537163
3294,A,2,Memory Usage,0.434209
3,A,3,CPU Usage,0.520069
3295,A,3,Memory Usage,0.497034
4,A,4,CPU Usage,0.458788
3296,A,4,Memory Usage,0.495404


In [64]:
# Save dataframe to new files
df_melt.to_csv("Manual_Log_Filtered_New.csv")
df_melt.to_excel("Manual_Log_Filtered_New.xlsx")