In [1]:
import pymongo
import pandas as pd
from pymongo import MongoClient
import seaborn
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_formats=['pdf']# render pdf figs for LaTeX

In [12]:
from typing import List
def optimize_floats(df: pd.DataFrame) -> pd.DataFrame:
    floats = df.select_dtypes(include=['float64']).columns.tolist()
    df[floats] = df[floats].apply(pd.to_numeric, downcast='float')
    return df


def optimize_ints(df: pd.DataFrame) -> pd.DataFrame:
    ints = df.select_dtypes(include=['int64']).columns.tolist()
    df[ints] = df[ints].apply(pd.to_numeric, downcast='integer')
    return df


def optimize_objects(df: pd.DataFrame, datetime_features: List[str]) -> pd.DataFrame:
    for col in df.select_dtypes(include=['object']):
        if col not in datetime_features:
            num_unique_values = len(df[col].unique())
            num_total_values = len(df[col])
            if float(num_unique_values) / num_total_values < 0.5:
                df[col] = df[col].astype('category')
        else:
            df[col] = pd.to_datetime(df[col])
    return df

def optimize(df: pd.DataFrame, datetime_features: List[str] = []):
    return optimize_floats(optimize_ints(optimize_objects(df, datetime_features)))

# load moodle

In [13]:
parse_dates = ["timeStamp"]
optimize_df = True
client = MongoClient("manager-mongo-manager")

db_names = client.list_database_names()
df_migr_moodle = None
df_moodle = None 
for db_name in db_names:
    if "final" not in db_name:
        continue
    if "moodle" not in db_name:
        continue
    print("Database: " + db_name)
    db = client[db_name]
    coll_names = db.list_collection_names(nameOnly=True)
    
    for coll_name in coll_names:
        print(coll_name)
        if "migr" in coll_name:
            continue
        data = pd.DataFrame(list(db[coll_name].find()))
        data["database"] = db_name
        data["collection"] = coll_name
        data["timeStamp"] = pd.to_datetime(data["timeStamp"]*1000000)
        data["relative_time"] = ((data["timeStamp"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")
        ar = coll_name.split("-")
        data["tp"]=int(ar[4])
        data["type"] = ar[3]
        data["fdw"] = ar[2]
        if len(ar) > 5:
            data["duration"] = ar[5]
        else:
            data["duration"] = None
            

        data2 =  pd.DataFrame(list(db[coll_name+"_migr"].find()))

        if not(data2.empty):
            data2["tp"] = (data["tp"].unique())
            data2["database"] = db_name
            data2["collection"] = coll_name
            data2["tp"]=int(ar[4])
            data2["type"] = ar[3]
            if len(ar) > 5:
                data2["duration"] = ar[5]
            else:
                data2["duration"] = None
            data2["timeStamp_start"] = pd.to_datetime(data2["start"]*1000000000)
            data2["timeStamp_end"] = pd.to_datetime(data2["end"]*1000000000)
            data2["relative_time_start"] = ((data2["timeStamp_start"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")
            data2["relative_time_end"] = ((data2["timeStamp_end"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")
            if df_migr_moodle is None:
                df_migr_moodle = data2
            else:
                df_migr_moodle = pd.concat([df_migr_moodle,data2])   
            data["migration_start"] = ((data2["timeStamp_start"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")[0]
            data["migration_end"] = ((data2["timeStamp_end"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")[0]
        if df_moodle is None:
            if optimize_df:
                df_moodle = optimize(data)
            else:
                df_moodle = data
        else:
            if optimize_df:
                df_moodle = pd.concat([df_moodle,optimize(data)])         
            else:
                df_moodle = pd.concat([df_moodle,data])          
    #plt.figure()
    #fig = plt.figure()
    #data.plot(x="p_start", y="p_duration", fig=fig)
    #sns.lineplot(data=data, x="timeStamp", y="elapsed", hue="label")
df_moodle["before_migration"] = (df_moodle["relative_time"] < df_moodle["migration_start"]) 
df_moodle["after_migration"] = df_moodle["relative_time"] > df_moodle["migration_end"]

Database: final0406-moodle-fdw-1M-moodle-fdw-cbwbv
final0406-moodle-fdw-1M-30-120
final0406-moodle-fdw-1M-30-180_migr
final0406-moodle-fdw-1M-30-60
final0406-moodle-fdw-1M-30-240
final0406-moodle-fdw-1M-30-300_migr
final0406-moodle-fdw-1M-30-120_migr
final0406-moodle-fdw-1M-30-240_migr
final0406-moodle-fdw-1M-30-300
final0406-moodle-fdw-1M-30-180
final0406-moodle-fdw-1M-30-60_migr
Database: final0406-moodle-fdw-1M-moodle-fdw-dtkt7
final0406-moodle-fdw-1M-30-240_migr
final0406-moodle-fdw-1M-30-300
final0406-moodle-fdw-1M-30-180_migr
final0406-moodle-fdw-1M-30-120_migr
final0406-moodle-fdw-1M-30-240
final0406-moodle-fdw-1M-30-120
final0406-moodle-fdw-1M-30-180
final0406-moodle-fdw-1M-30-60_migr
final0406-moodle-fdw-1M-30-60
final0406-moodle-fdw-1M-30-300_migr
Database: final0406-moodle-fdw-1M-moodle-fdw-txdqh
final0406-moodle-fdw-1M-30-120
final0406-moodle-fdw-1M-30-300_migr
final0406-moodle-fdw-1M-30-60
final0406-moodle-fdw-1M-30-240
final0406-moodle-fdw-1M-30-60_migr
final0406-moodle-f

In [14]:
df_moodle

Unnamed: 0,_id,timeStamp,elapsed,label,responseCode,responseMessage,threadName,dataType,success,failureMessage,...,collection,relative_time,tp,type,fdw,duration,migration_start,migration_end,before_migration,after_migration
0,5ed9d8304c6ad69816254ed4,2020-06-05 05:25:14.652,85,Frontpage not logged,200,OK,Moodle Test 2-2,text,True,,...,final0406-moodle-fdw-1M-30-120,0.0,30,1M,fdw,120,162.0,173.0,True,False
1,5ed9d8304c6ad69816254ed5,2020-06-05 05:25:14.652,32,Frontpage not logged-0,303,See Other,Moodle Test 2-2,text,True,,...,final0406-moodle-fdw-1M-30-120,0.0,30,1M,fdw,120,162.0,173.0,True,False
2,5ed9d8304c6ad69816254ed6,2020-06-05 05:25:14.684,53,Frontpage not logged-1,200,OK,Moodle Test 2-2,text,True,,...,final0406-moodle-fdw-1M-30-120,0.0,30,1M,fdw,120,162.0,173.0,True,False
3,5ed9d8304c6ad69816254ed7,2020-06-05 05:25:14.658,90,Frontpage not logged,200,OK,Moodle Test 2-3,text,True,,...,final0406-moodle-fdw-1M-30-120,0.0,30,1M,fdw,120,162.0,173.0,True,False
4,5ed9d8304c6ad69816254ed8,2020-06-05 05:25:14.658,31,Frontpage not logged-0,303,See Other,Moodle Test 2-3,text,True,,...,final0406-moodle-fdw-1M-30-120,0.0,30,1M,fdw,120,162.0,173.0,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9998,5ed746365a5f7dc541a303fa,2020-06-03 06:41:39.941,112,View course,200,OK,Moodle Test 2-7,text,True,,...,final2805-moodle-nofdw-2O-10,300.0,10,2O,nofdw,,,,False,False
9999,5ed746365a5f7dc541a303fb,2020-06-03 06:41:39.941,562,Login,200,OK,Moodle Test 2-19,text,True,,...,final2805-moodle-nofdw-2O-10,300.0,10,2O,nofdw,,,,False,False
10000,5ed746365a5f7dc541a303fc,2020-06-03 06:41:39.941,324,Login-0,303,See Other,Moodle Test 2-19,text,True,,...,final2805-moodle-nofdw-2O-10,300.0,10,2O,nofdw,,,,False,False
10001,5ed746365a5f7dc541a303fd,2020-06-03 06:41:40.265,27,Login-1,303,See Other,Moodle Test 2-19,text,True,,...,final2805-moodle-nofdw-2O-10,300.0,10,2O,nofdw,,,,False,False


In [15]:
df_moodle.groupby(["type","tp", "fdw"])["collection"].nunique()


type  tp  fdw  
1M    30  fdw      10
1O    10  fdw       2
          nofdw     2
      15  fdw       2
          nofdw     2
      20  fdw       2
          nofdw     2
      25  fdw       2
          nofdw     2
      30  fdw       2
          nofdw     2
1O1M  10  fdw       2
      15  fdw       2
      20  fdw       2
      25  fdw       2
      30  fdw       2
2O    10  nofdw     2
      15  nofdw     2
      20  nofdw     2
      25  nofdw     2
      30  nofdw     2
Name: collection, dtype: int64

In [16]:
df_moodle.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2311564 entries, 0 to 10002
Data columns (total 36 columns):
 #   Column            Dtype         
---  ------            -----         
 0   _id               object        
 1   timeStamp         datetime64[ns]
 2   elapsed           int16         
 3   label             object        
 4   responseCode      int16         
 5   responseMessage   object        
 6   threadName        category      
 7   dataType          category      
 8   success           bool          
 9   failureMessage    object        
 10  bytes             int32         
 11  sentBytes         int16         
 12  grpThreads        int8          
 13  allThreads        int8          
 14  URL               object        
 15  Latency           int16         
 16  IdleTime          int8          
 17  Connect           int16         
 18  course            object        
 19  tenant            int8          
 20  order_course      int8          
 21  size      

# load camunda

In [17]:
parse_dates = ["timeStamp"]
checkProcess = False
client = MongoClient("manager-mongo-manager")
optimize_df = True
db_names = client.list_database_names()
df_camunda = None 
df_migr_camunda = None
df_process = None
for db_name in db_names:
    if "camunda" not in db_name:
        continue
    if  "final" not in db_name:
        continue
    print("Database: " + db_name)
    db = client[db_name]
    coll_names = db.list_collection_names(nameOnly=True)
    
    for coll_name in coll_names:
        if "process" in coll_name:
            continue
        if "migr" in coll_name:
            continue

        print(coll_name + " - ", end="")

        ar = coll_name.split("-")
        #print(ar[3],ar[4],ar[5])        
        if ar[3] != "1M":
            if int(ar[4]) >= 200:
                print("pass")
                continue
            

        print("OK")

        data = pd.DataFrame(list(db[coll_name].find()))
        data["database"] = db_name
        data["collection"] = coll_name
        data["timeStamp"] = pd.to_datetime(data["timeStamp"]*1000000)
        data["relative_time"] = ((data["timeStamp"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")

        data["tp"]=int(ar[4])
        data["type"] = ar[3]
        data["fdw"] = ar[2]
        data["nb_clients"] = int(ar[5])
            
        data2 =  pd.DataFrame(list(db[coll_name+"_migr"].find()))
        if not(data2.empty):
            data2["tp"]=int(ar[4])
            data2["database"] = db_name
            data2["collection"] = coll_name
            data2["fdw"] = ar[2]            
            data2["type"] = ar[3]
            #data2["nb_clients"] = int(ar[5])            
            data2["duration"] = int(ar[5])               
            data2["timeStamp_start"] = pd.to_datetime(data2["start"]*1000000000)
            data2["timeStamp_end"] = pd.to_datetime(data2["end"]*1000000000)        
            data2["relative_time_start"] = ((data2["timeStamp_start"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")
            data2["relative_time_end"] = ((data2["timeStamp_end"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")        
            if len(ar) > 5:
                data2["duration"] = ar[5]
            else:
                data2["duration"] = None            
            if df_migr_camunda is None:
                df_migr_camunda = data2
            else:
                df_migr_camunda = pd.concat([df_migr_camunda,data2])        
        if checkProcess:
            data3 = pd.DataFrame(list(db[coll_name+"-process"].find()))
            data3["database"] = db_name
            data3["collection"] = coll_name
            data3["p_start"] = pd.to_datetime(data3["p_start"]*1000000)
            data3["p_end"] = pd.to_datetime(data3["p_end"]*1000000)    
            data3["t_start"] = pd.to_datetime(data3["t_start"]*1000000)
            data3["t_end"] = pd.to_datetime(data3["t_end"]*1000000)  
            data3["p_duration"] = data3["p_duration"].astype(float)
            data3["relative_time"] = ((data3["p_start"] - min(data3["p_start"]))/1000).astype("timedelta64[ms]")
            data3["tp"] = int(ar[-1])
            if not(data2.empty):        
                data3["migration_start"] = ((data2["timeStamp_start"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")[0]
                data3["migration_end"] = ((data2["timeStamp_end"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")[0]          
            data3 = data3[data3["business_key"]=="test"]
            if df_process is None:
                df_process = data3
            else:
                df_process = pd.concat([df_process,data3])   
        if not(data2.empty):            
            data["migration_start"] = ((data2["timeStamp_start"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")[0]
            data["migration_end"] = ((data2["timeStamp_end"] - min(data["timeStamp"]))/1000).astype("timedelta64[ms]")[0]        
        if df_camunda is None:
            if optimize_df:
                df_camunda = optimize(data)
            else:
                df_camunda = data
        else:
            if optimize_df:
                df_camunda = pd.concat([df_camunda,optimize(data)])         
            else:
                df_camunda = pd.concat([df_camunda,data])         
                
    #plt.figure()
    #fig = plt.figure()
    #data.plot(x="p_start", y="p_duration", fig=fig)
    #sns.lineplot(data=data, x="timeStamp", y="elapsed", hue="label")
#df_process["before_migration"]= df_process["relative_time"] < df_process["migration_start"]
#df_process["after_migration"]= df_process["relative_time"] < df_process["migration_start"]
df_camunda["before_migration"] = df_camunda["relative_time"] < df_camunda["migration_start"]
df_camunda["after_migration"] = df_camunda["relative_time"] > df_camunda["migration_end"]

Database: final0506-camunda-fdw-1M-camunda-fdw-9nf7t
final0506-camunda-fdw-1M-200-120 - OK
final0506-camunda-fdw-1M-200-180 - OK
final0506-camunda-fdw-1M-200-300 - OK
final0506-camunda-fdw-1M-200-60 - OK
final0506-camunda-fdw-1M-200-240 - OK
Database: final0506-camunda-fdw-1M-camunda-fdw-k6p6g
final0506-camunda-fdw-1M-200-60 - OK
final0506-camunda-fdw-1M-200-300 - OK
final0506-camunda-fdw-1M-200-240 - OK
final0506-camunda-fdw-1M-200-120 - OK
final0506-camunda-fdw-1M-200-180 - OK
Database: final0506-camunda-fdw-1M-camunda-fdw-w27kv
final0506-camunda-fdw-1M-200-240 - OK
final0506-camunda-fdw-1M-200-120 - OK
final0506-camunda-fdw-1M-200-300 - OK
final0506-camunda-fdw-1M-200-60 - OK
final0506-camunda-fdw-1M-200-180 - OK
Database: final0506-camunda-fdw-1M-camunda-fdw-wnzsf
final0506-camunda-fdw-1M-200-300 - OK
final0506-camunda-fdw-1M-200-180 - OK
final0506-camunda-fdw-1M-200-60 - OK
final0506-camunda-fdw-1M-200-120 - OK
final0506-camunda-fdw-1M-200-240 - OK
Database: final0506-camunda-fdw-

In [18]:
df_camunda = df_camunda[(df_camunda.label != "Test")&(df_camunda.label != "JSR223 Sampler")]

df_camunda.groupby(["type","tp", "fdw", "nb_clients"])["collection"].nunique()

type  tp   fdw    nb_clients
1M    200  fdw    60            2
                  120           2
                  180           2
                  240           2
                  300           2
1O    50   fdw    100           2
           nofdw  100           2
      100  fdw    100           2
           nofdw  100           2
      150  fdw    100           2
           nofdw  100           2
1O1M  50   fdw    100           2
      100  fdw    100           2
      150  fdw    100           2
2O    50   nofdw  100           2
      100  nofdw  100           2
      150  nofdw  100           2
Name: collection, dtype: int64

In [19]:
df_camunda["tp"].unique()

array([200,  50, 150, 100], dtype=int16)

In [20]:
df_count = df_moodle.groupby(["type"])["tenant"].nunique().reset_index()
df_moodle = df_moodle.join(df_count.set_index("type"), on="type", rsuffix="_grp")
df_moodle["real_tp"] = df_moodle["tp"] * df_moodle["tenant_grp"]

df_count = df_camunda.groupby(["type"])["tenant"].nunique().reset_index()
df_camunda = df_camunda.join(df_count.set_index("type"), on="type", rsuffix="_grp")
df_camunda["real_tp"] = df_camunda["tp"] * df_camunda["tenant_grp"]


In [21]:
pal = "gray_r"
sns.palplot(sns.color_palette(pal))
sns.set_palette(sns.color_palette(pal))
whis=[10,90]

<Figure size 432x72 with 1 Axes>

In [22]:
df_moodle[(df_moodle["relative_time"] < 600) &(df_moodle.type == "1O1M")].groupby(["real_tp", "before_migration"]).describe(percentiles=[0.01,0.5,0.99])

Unnamed: 0_level_0,Unnamed: 1_level_0,elapsed,elapsed,elapsed,elapsed,elapsed,elapsed,elapsed,elapsed,responseCode,responseCode,...,migration_end,migration_end,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,1%,50%,99%,max,count,mean,...,99%,max,count,mean,std,min,1%,50%,99%,max
real_tp,before_migration,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
20,False,38307.0,167.926802,158.061829,17.0,23.0,132.0,677.94,873.0,38307.0,230.719503,...,370.0,370.0,38307.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
20,True,54516.0,167.222559,155.961335,19.0,22.0,129.0,690.0,1339.0,54516.0,229.914135,...,370.0,370.0,54516.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
30,False,59724.0,177.018167,189.620308,17.0,22.0,129.0,922.77,1215.0,59724.0,230.614661,...,359.0,359.0,59724.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
30,True,81921.0,181.596697,174.41343,19.0,22.0,134.0,772.0,1655.0,81921.0,229.815823,...,359.0,359.0,81921.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
40,False,80319.0,193.553593,242.179521,16.0,22.0,130.0,1270.82,2635.0,80319.0,230.716107,...,357.0,357.0,80319.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
40,True,109752.0,353.785635,625.701666,19.0,23.0,167.0,3156.0,12090.0,109752.0,229.965431,...,357.0,357.0,109752.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
50,False,100162.0,217.059593,301.321338,16.0,22.0,137.0,1646.0,4132.0,100162.0,230.74082,...,359.0,359.0,100162.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
50,True,141299.0,1425.49633,1900.791509,5.0,22.0,816.0,9819.02,17841.0,141299.0,243.651349,...,359.0,359.0,141299.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
60,False,120265.0,296.452309,630.636245,16.0,23.0,151.0,2477.0,19718.0,120265.0,231.081429,...,360.0,360.0,120265.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
60,True,173693.0,1383.996845,1863.018991,4.0,15.0,741.0,9550.16,16609.0,173693.0,259.132567,...,360.0,360.0,173693.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0


In [23]:
sns.lineplot(data=df_moodle[(df_moodle["relative_time"] < 600) &(df_moodle.type == "1O1M")],  x="relative_time", y="elapsed", hue="real_tp")

<matplotlib.axes._subplots.AxesSubplot at 0x7f68de740250>

<Figure size 432x288 with 1 Axes>

# Split vs vanilla

In [24]:

df_moodle[((df_moodle["relative_time"] < 300) &(df_moodle.type == "2O")) |((df_moodle.type == "1O1M") & (df_moodle["before_migration"] == True)&(df_moodle.relative_time < 300))].groupby(["real_tp", "fdw"]).describe(percentiles=[.01,.5,.99])

Unnamed: 0_level_0,Unnamed: 1_level_0,elapsed,elapsed,elapsed,elapsed,elapsed,elapsed,elapsed,elapsed,responseCode,responseCode,...,migration_end,migration_end,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp,tenant_grp
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,1%,50%,99%,max,count,mean,...,99%,max,count,mean,std,min,1%,50%,99%,max
real_tp,fdw,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
20,fdw,50179.0,166.524044,156.083244,19.0,22.0,129.0,694.0,1339.0,50179.0,229.890711,...,370.0,370.0,50179.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
20,nofdw,50168.0,145.863479,146.075885,19.0,22.0,107.0,660.33,1271.0,50168.0,229.921902,...,,,50168.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
30,fdw,76069.0,181.828616,174.56327,19.0,22.0,134.0,779.0,1655.0,76069.0,229.856446,...,359.0,359.0,76069.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
30,nofdw,76186.0,164.380162,175.192894,20.0,22.0,114.0,765.0,2958.0,76186.0,229.899824,...,,,76186.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
40,fdw,101947.0,366.097953,644.940757,19.0,24.0,172.0,3275.0,12090.0,101947.0,229.945913,...,357.0,357.0,101947.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
40,nofdw,101720.0,359.390769,684.427255,19.0,23.0,149.0,3334.81,12382.0,101720.0,229.952222,...,,,101720.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
50,fdw,130984.0,1515.421647,1941.126965,5.0,23.0,918.0,10055.0,17841.0,130984.0,244.551602,...,359.0,359.0,130984.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
50,nofdw,130761.0,1483.412654,1956.000114,5.0,22.0,871.0,10129.2,18175.0,130761.0,245.256238,...,,,130761.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
60,fdw,161011.0,1470.764892,1903.335965,4.0,14.0,808.0,9765.9,16609.0,161011.0,261.144562,...,360.0,360.0,161011.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0
60,nofdw,160561.0,1472.45199,1912.753786,5.0,13.0,801.0,9767.6,18000.0,160561.0,262.353517,...,,,160561.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0


In [38]:
fig, ax_array = plt.subplots(2,2, figsize=(7,4))

ax = ax_array[0,0]
#ax.set_yscale('log')
sns.boxplot(data=df_moodle[(df_moodle["relative_time"] < 300) & (df_moodle.type == "1O")], x="real_tp", y="elapsed", hue="fdw", whis=whis, showfliers = False, ax = ax)
ax = ax_array[0,1]
#ax.set_yscale('log')
sns.boxplot(data=df_moodle[((df_moodle["relative_time"] < 300) &(df_moodle.type == "2O")) |((df_moodle.type == "1O1M") & (df_moodle["before_migration"] == True)&(df_moodle.relative_time < 300))],  x="real_tp", y="elapsed", hue="fdw", whis=whis, showfliers = False, ax = ax)


ax = ax_array[1,0]
sns.boxplot(data=df_camunda[(df_camunda["relative_time"] < 300) & (df_camunda.type == "1O")], x="real_tp", y="elapsed", hue="fdw", whis=whis, showfliers = False, ax=ax)
#g.set_axis_labels("Input query throughput (in Request Per Second)","Response time (in milliseconds)")
#g.set_titles("Experiment {col_name}")
#for ax in g.axes.ravel():
#    legend = ax.legend()
#legend.set_title("Installation")
#new_labels = ['Split', 'Single']
#for t, l in zip(legend.texts, new_labels): t.set_text(l)
ax = ax_array[1,1]
sns.boxplot(data=df_camunda[(df_camunda["relative_time"] < 300) &(df_camunda.type == "2O") |((df_camunda.type == "1O1M") & (df_camunda["before_migration"] == True))], x="real_tp", y="elapsed", hue="fdw", whis=whis, showfliers = False,  ax=ax)
#g.set_axis_labels("Input query throughput (in Request Per Second)","Response time (in milliseconds)")

#g.set_titles("Experiment {col_name}")
#legend = g.axes.ravel()[0].legend()
#legend.set_title("Installation")
#new_labels = ['Split', 'Single']
#for t, l in zip(legend.texts, new_labels): t.set_text(l)    
    
for ax in ax_array.flatten():
    ax.legend([],[], frameon=False)
    ax.set_ylabel("")
    ax.set_xlabel("")

ax_array[0,0].set(ylim=(0, 500))     
ax_array[0,0].set_yticks(np.arange(0, 501, 100))
ax_array[0,0].set_ylabel("Iomad\nResponse time (in ms)")
ax_array[1,0].set(ylim=(0, 125))
ax_array[1,0].set_yticks([0,25,50,75,100,125])
ax_array[1,0].set_ylabel("Camunda\nResponse time (in ms)")
ax_array[1,0].set_xlabel("Input query throughput (in RPS)")
ax_array[1,1].set(ylim=(0, 125))
ax_array[1,1].set_yticks([0,25,50,75,100,125])
ax_array[1,1].set_xlabel("Input query throughput (in RPS)")
ax_array[0,0].set_title("1 tenant")
ax_array[0,1].set_title("2 tenants")
ax_array[0,1].set(ylim=(0, 4000)) 
plt.savefig('figures/1O_2O.pdf', bbox_inches = "tight") 

<Figure size 504x288 with 4 Axes>

In [26]:
#sns.boxplot(data=df_moodle[((df_moodle["after_migration"] == False)&(df_moodle["relative_time"] > df_moodle["migration_start"] - 20) & (df_moodle["relative_time"] < 600)) &(df_moodle.type == "1O1M") &(df_moodle["after_migration"] == False)], x="real_tp",y="elapsed",hue="before_migration", whis=whis, showfliers = False)
sns.boxplot(data=df_moodle[(df_moodle.type == "1O1M")&((df_moodle["relative_time"] < 300) &(df_moodle.type == "2O")) |((df_moodle.type == "1O1M") & (df_moodle["before_migration"] == True)&(df_moodle.relative_time < 300))],  x="real_tp", y="elapsed", hue="fdw", whis=whis, showfliers = False)
plt.subplots()
sns.boxplot(data=df_moodle[(df_moodle.type == "1O1M")&(df_moodle["after_migration"] == False)&(df_moodle["relative_time"] > df_moodle["migration_start"] - 20)], x="real_tp",y="elapsed",hue="before_migration", whis=whis, showfliers = False)
#plt.subplots()
#sns.boxplot(data=df_moodle[(df_moodle.type == "1O1M")&((df_moodle["before_migration" == False] &(df_moodle["after_migration"] == True))&(df_moodle["relative_time"] > df_moodle["migration_start"] - 20)], x="real_tp",y="elapsed",hue="before_migration", whis=whis, showfliers = False)


<matplotlib.axes._subplots.AxesSubplot at 0x7f68f07b61c0>

<Figure size 432x288 with 1 Axes>

<Figure size 432x288 with 1 Axes>

# Performance gain with migration

In [42]:
fig, ax_array = plt.subplots(2,2, figsize=(7,4))

ax = ax_array[0,0]
sns.boxplot(data=df_moodle[(df_moodle.after_migration == False) & (df_moodle.type == "1O1M") &(((df_moodle["before_migration"] == True)&(df_moodle["relative_time"] < 300))  | ((df_moodle["after_migration"] == True)&(df_moodle["relative_time"]<600)))], x="real_tp", y="elapsed", hue="tenant", whis=whis, showfliers = False, ax=ax)

ax = ax_array[0,1]
sns.boxplot(data=df_moodle[(df_moodle.after_migration == True) & (df_moodle.type == "1O1M") &(((df_moodle["before_migration"] == True)&(df_moodle["relative_time"] < 300))  | ((df_moodle["after_migration"] == True)&(df_moodle["relative_time"]<600)))], x="real_tp", y="elapsed", hue="tenant", whis=whis, showfliers = False, ax=ax)

ax = ax_array[1,0]
sns.boxplot(data=df_camunda[(df_camunda.after_migration == False) & (df_camunda.type == "1O1M") &(((df_camunda["before_migration"] == True)&(df_camunda["relative_time"] < 300))  | ((df_camunda["after_migration"] == True)&(df_camunda["relative_time"]<600)))], x="real_tp", y="elapsed", hue="tenant", whis=whis, showfliers = False, ax=ax)

ax = ax_array[1,1]
sns.boxplot(data=df_camunda[(df_camunda.after_migration == True) &(df_camunda.type == "1O1M") &(((df_camunda["before_migration"] == True)&(df_camunda["relative_time"] < 300))  | ((df_camunda["after_migration"] == True)&(df_camunda["relative_time"]<600)))], x="real_tp", y="elapsed", hue="tenant", whis=whis, showfliers = False, ax=ax)

for ax in ax_array.flatten():
    ax.legend([],[], frameon=False)
    ax.set_ylabel("")
    ax.set_xlabel("")
ax_array[0,0].set(ylim=(0, 4000))     
ax_array[0,0].set_ylabel("Iomad\nResponse time (in ms)")
ax_array[1,0].set(ylim=(0, 125)) 
ax_array[1,0].set_yticks([0,25,50,75,100,125])
ax_array[1,0].set_ylabel("Camunda\nResponse time (in ms)")
ax_array[1,0].set_xlabel("Input query throughput (in RPS)")
ax_array[1,1].set_xlabel("Input query throughput (in RPS)")
ax_array[1,1].set_yticks([0,25,50,75,100,125])
ax_array[1,1].set(ylim=(0, 125)) 
ax_array[0,0].set_title("Before migration")
ax_array[0,1].set(ylim=(0, 1000))  
ax_array[0,1].set_title("After migration")
plt.savefig('figures/1O1M.pdf', bbox_inches = "tight") 

<Figure size 504x288 with 4 Axes>

# effects on migration

In [28]:
import numpy as np
df_migr_moodle["migration_duration"] = (df_migr_moodle["timeStamp_end"] - df_migr_moodle["timeStamp_start"])/ np.timedelta64(1, 's')
df_migr_moodle["duration"]= df_migr_moodle["duration"].astype(float) 

df_migr_camunda["migration_duration"] = (df_migr_camunda["timeStamp_end"] - df_migr_camunda["timeStamp_start"])/ np.timedelta64(1, 's')
df_migr_camunda["duration"]= df_migr_camunda["duration"].astype(float) 

fig, ax_array = plt.subplots(1,2, figsize=(6,1.5))

ax = ax_array[0]
sns.scatterplot(data=df_migr_moodle[df_migr_moodle.type == "1M"], x="duration", y="migration_duration", c=["B"], s=8,ax=ax)
sns.regplot(data=df_migr_moodle[df_migr_moodle.type == "1M"], x="duration", y="migration_duration", scatter=False,line_kws = {"linestyle":"--", "linewidth": 1},ax=ax)
ax = ax_array[1]
sns.scatterplot(data=df_migr_camunda[df_migr_camunda.type == "1M"], x="duration", y="migration_duration", c=["B"], s=8,ax=ax)
sns.regplot(data=df_migr_camunda[df_migr_camunda.type == "1M"], x="duration", y="migration_duration",scatter=False,line_kws = {"linestyle":"--", "linewidth": 1}, ax=ax)

ax_array[0].set_title("Iomad")
ax_array[1].set_title("Camunda")
ax_array[0].set_xticks([60,120,180,240,300])
ax_array[0].set(xlim=(0, 310))
ax_array[0].set(ylim=(0, 15))
ax_array[1].set_xticks([60,120,180,240,300])
ax_array[1].set(ylim=(0, 40))
ax_array[1].set(xlim=(0, 310))
ax_array[0].set_xlabel("Duration of injection (in seconds)")
ax_array[1].set_xlabel("Duration of injection (in seconds)")
ax_array[0].set_ylabel("Migration duration\n(in seconds)")
ax_array[1].set_ylabel(None)
plt.savefig('figures/1M.pdf', bbox_inches = "tight") 

  scout = ax.scatter([], [], **kws)


<Figure size 432x108 with 2 Axes>

# effects on colocated tenant

In [29]:
fig, ax_array = plt.subplots(1,2, figsize=(6,1.5))
ax = ax_array[0]
sns.boxplot(data=df_moodle[((df_moodle["after_migration"] == False)&(df_moodle["relative_time"] > df_moodle["migration_start"] - 20) & (df_moodle["relative_time"] < 600)) &(df_moodle.type == "1O1M") &(df_moodle["after_migration"] == False)], x="real_tp",y="elapsed",hue="before_migration", whis=whis, showfliers = False,ax=ax)

ax = ax_array[1]
sns.boxplot(data=df_camunda[((df_camunda["after_migration"] == False)&(df_camunda["relative_time"] > df_camunda["migration_start"] - 20) & (df_camunda["relative_time"] < 600)) &(df_camunda.type == "1O1M") &(df_camunda["after_migration"] == False)], x="real_tp",y="elapsed",hue="before_migration", whis=whis, showfliers = False,ax=ax)

for ax in ax_array.flatten():
    ax.legend([],[], frameon=False)
    ax.set_ylabel("")
    ax.set_xlabel("")
ax_array[0].set(ylim=(0, 500))   
ax_array[0].set_yticks([0,100,200,300,400, 500])
ax_array[0].set_title("Iomad")
ax_array[1].set(ylim=(0, 75)) 
ax_array[1].set_yticks([0,25,50,75])
ax_array[1].set_title("Camunda")
ax_array[0].set_ylabel("Response time (in ms)")
ax_array[0].set_xlabel("Input query throughput (in RPS)")
ax_array[1].set_xlabel("Input query throughput (in RPS)")
plt.savefig('figures/1O1M_colocated.pdf', bbox_inches = "tight") 

<Figure size 432x108 with 2 Axes>

# synchro figures

In [43]:
!cp figures/*.pdf ../../paper/figures/results