In [39]:
import pandas as pd
import numpy as np
import openpyxl

In [40]:
df = pd.read_csv("Manual_Log.csv", sep = ";", error_bad_lines=False)
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage,Transaction_ID,To_Server,Message
0,0.464447623,Client,INFO,0.0,0.0,1,A,Request from customer Transaction ID: 1
1,0.75912351,Client,INFO,0.0,0.0,2,A,Request from customer Transaction ID: 2
2,1.302.570.232,A,INFO,0.026,0.896,1,B,Send Authentication request to server B Transa...
3,1.613.766.033,A,INFO,0.031,0.151,2,B,Send Authentication request to server B Transa...
4,3.144.929.724,Client,INFO,0.0,0.0,3,A,Request from customer Transaction ID: 3
5,4.465.991.302,Client,INFO,0.0,0.0,4,A,Request from customer Transaction ID: 4
6,4.541.813.003,B,INFO,0.046,0.792,1,A,Return Authentication request to server A Tran...
7,4.675.912.386,A,INFO,0.926,0.696,1,C,Request Balance to server C Transaction ID: 1 ...
8,5.766.717.357,Client,Esketit,0.0,0.0,5,A,Request from customer Transaction ID: 5
9,5.777.693.384,C,Esketit,0.868,0.394,1,A,Return Balance to server A Transaction ID: 1 C...


In [41]:
# Split dataframe into three dataframes based on Server
df = df.drop(["Transaction_ID","To_Server", "Message"], axis=1)
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage
0,0.464447623,Client,INFO,0.0,0.0
1,0.75912351,Client,INFO,0.0,0.0
2,1.302.570.232,A,INFO,0.026,0.896
3,1.613.766.033,A,INFO,0.031,0.151
4,3.144.929.724,Client,INFO,0.0,0.0
5,4.465.991.302,Client,INFO,0.0,0.0
6,4.541.813.003,B,INFO,0.046,0.792
7,4.675.912.386,A,INFO,0.926,0.696
8,5.766.717.357,Client,Esketit,0.0,0.0
9,5.777.693.384,C,Esketit,0.868,0.394


In [42]:
# Reset index
df = df.reset_index()
df = df.drop(["index"], axis=1)
df.head(10)

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage
0,0.464447623,Client,INFO,0.0,0.0
1,0.75912351,Client,INFO,0.0,0.0
2,1.302.570.232,A,INFO,0.026,0.896
3,1.613.766.033,A,INFO,0.031,0.151
4,3.144.929.724,Client,INFO,0.0,0.0
5,4.465.991.302,Client,INFO,0.0,0.0
6,4.541.813.003,B,INFO,0.046,0.792
7,4.675.912.386,A,INFO,0.926,0.696
8,5.766.717.357,Client,Esketit,0.0,0.0
9,5.777.693.384,C,Esketit,0.868,0.394


In [43]:
# Convert Time from seconds to minutes and name the new time Time_floor
df["Time"]=df["Time"].div(60)
df["Time_floor"] = np.floor(df["Time"]).astype("int")
df.head(10)

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [45]:
# Only select rows with Message_type equal to INFO
df_info = df.loc[df["Message_type"] == "INFO"]
df_info

Unnamed: 0,Time,Server,Message_type,CPU Usage,Memory Usage
0,0.464447623,Client,INFO,0.000,0.000
1,0.75912351,Client,INFO,0.000,0.000
2,1.302.570.232,A,INFO,0.026,0.896
3,1.613.766.033,A,INFO,0.031,0.151
4,3.144.929.724,Client,INFO,0.000,0.000
...,...,...,...,...,...
59995,493.441.225,A,INFO,0.173,0.184
59996,493.457.747,A,INFO,0.834,0.711
59997,4.935.139.377,A,INFO,0.179,0.111
59998,4.935.654.579,C,INFO,0.371,0.412


In [34]:
# Group rows by Server and Time_floor; drop obsolete Time column
df_grouped = df_info.groupby(["Server","Time_floor"], as_index=False).mean()
df_grouped = df_grouped.drop(["Time"], axis=1)
df_grouped.head(10)

Unnamed: 0,Server,Time_floor,CPU Usage,Memory Usage
0,A,0,0.454487,0.493897
1,A,1,0.486537,0.424683
2,A,2,0.537163,0.434209
3,A,3,0.520069,0.497034
4,A,4,0.458788,0.495404
...,...,...,...,...
3287,Client,818,0.000000,0.000000
3288,Client,819,0.000000,0.000000
3289,Client,820,0.000000,0.000000
3290,Client,821,0.000000,0.000000


In [12]:
# Unpivot dataframe from wide format to long format
df_melt = pd.melt(df_grouped, id_vars = ["Server", "Time_floor"], 
                  value_vars = ["CPU Usage","Memory Usage"], 
                  value_name = "Value")

In [13]:
# Sort dataframe by Server and Time_floor
df_melt = df_melt.sort_values(by=["Server", "Time_floor"])
df_melt.head(10)

In [16]:
# Save dataframe to new files
df_melt.to_csv("Manual_Log_Filtered_New.csv")
df_melt.to_excel("Manual_Log_Filtered_New.xlsx")