## Understanding the cloud lab data

In [4]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mticker
import re
import hashlib
import time
import datetime as dt
from pandas.tseries.offsets import BDay
from matplotlib.lines import Line2D
import multiprocessing as mp
import calendar
from dateutil.relativedelta import relativedelta

# Makes the quality of display plot better: retina quality
%config InlineBackend.figure_format = 'retina'

# Allows setting values in dataframe copies without warnings (default='warn')
pd.options.mode.chained_assignment = None


source_dir = "data/"

if not os.path.isdir(source_dir):
    os.mkdir(dest_dir)
    
db = {}
for f in os.listdir(source_dir):
    if ".csv" in f:
        db[os.path.splitext(f)[0]] = pd.read_csv(source_dir + f, low_memory=False)    

# Print the tables in each csv        
#        for k, df in db.items():
#            print ("Table/dataframe: %s (Length: %d)" %(k, len(df)))
#            #print (("\t"  +  "\n\t").join(sorted(df.columns.tolist())), "\n")
#            print (("\t"),("\n" "\t ").join(sorted(df.columns.tolist())), "\n")
    
%run graphics.ipynb

## Total Hardware Information

In [144]:
nodes_df=pd.read_csv(source_dir + "nodes.csv")
hw_inception = nodes_df[["type", "inception"]]
#nodes_df 
#print(hw_inception.to_string())

# Get the timestamp when each type of node was available for use
hw_inception = hw_inception.groupby("type").agg(min)

# Nodes type and count with no inception date
hw_no_inception_count = hw_inception["inception"].isna().groupby("type").agg(sum)
#print(hw_no_inception_count.to_string()) # Uncomment to see the output




In [145]:
# Get the quantity of nodes per type for each starting time
hw_inception = hw_inception[~hw_inception["inception"].isna()]
hw_inception.reset_index(inplace=True)
hw_inception.head()

Unnamed: 0,type,inception
0,C240M4,2015-03-02 23:24:49
1,bbg,2013-03-11 21:45:19
2,blockstore,2016-03-01 16:14:34
3,c220g1,2016-04-15 13:20:23
4,c220g2,2016-03-11 02:34:39


In [146]:
nodes_count = pd.DataFrame(nodes_df.type.value_counts())
#nodes_count = nodes_df.type.value_counts()

#nodes_count = nodes_df.groupby("type")
#print(nodes_count.to_string())
#nodes_count.columns = ["count"]
type(nodes_count)
nodes_count.reset_index(inplace=True)
nodes_count.columns = ["type", "count"]
display(nodes_count.T)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,96,97,98,99,100,101,102,103,104,105
type,pcvm,m400,m510,c220g5,xl170,c220g2,d710,d430,pc3000,r320,...,dell2550,pm-mote,n310,d2950,apcon,d530,nuc8559,arista7500,dl360,pcivy
count,344,315,270,224,200,163,161,160,157,128,...,1,1,1,1,1,1,1,1,1,1


In [148]:
node_counts = pd.merge(node_counts, hw_inception, left_index=True, right_index=True)
#node_counts = pd.merge(node_counts, hw_inception, left_on, right_index=True)
#node_counts = pd.merge(node_counts, hw_inception)
node_counts.head()

Unnamed: 0,type_x,inception_x,inception_y,inception_x.1,type_y,inception_y.1
0,344,2016-02-19 14:10:13,2016-02-19 14:10:13,2016-02-19 14:10:13,C240M4,2015-03-02 23:24:49
1,315,2014-11-22 22:39:51,2014-11-22 22:39:51,2014-11-22 22:39:51,bbg,2013-03-11 21:45:19
2,270,2016-04-29 15:36:32,2016-04-29 15:36:32,2016-04-29 15:36:32,blockstore,2016-03-01 16:14:34
3,224,2018-02-17 22:33:33,2018-02-17 22:33:33,2018-02-17 22:33:33,c220g1,2016-04-15 13:20:23
4,200,2018-01-25 19:19:30,2018-01-25 19:19:30,2018-01-25 19:19:30,c220g2,2016-03-11 02:34:39


In [150]:
all_types = node_counts[(node_counts.index != "pcvm")]
all_types

Unnamed: 0,type_x,inception_x,inception_y,inception_x.1,type_y,inception_y.1
0,344,2016-02-19 14:10:13,2016-02-19 14:10:13,2016-02-19 14:10:13,C240M4,2015-03-02 23:24:49
1,315,2014-11-22 22:39:51,2014-11-22 22:39:51,2014-11-22 22:39:51,bbg,2013-03-11 21:45:19
2,270,2016-04-29 15:36:32,2016-04-29 15:36:32,2016-04-29 15:36:32,blockstore,2016-03-01 16:14:34
3,224,2018-02-17 22:33:33,2018-02-17 22:33:33,2018-02-17 22:33:33,c220g1,2016-04-15 13:20:23
4,200,2018-01-25 19:19:30,2018-01-25 19:19:30,2018-01-25 19:19:30,c220g2,2016-03-11 02:34:39
...,...,...,...,...,...,...
64,1,2018-05-22 14:13:06,2018-05-22 14:13:06,2018-05-22 14:13:06,r720,2014-09-25 12:26:55
65,1,2016-10-18 14:51:32,2016-10-18 14:51:32,2016-10-18 14:51:32,sdr,2017-08-29 11:55:02
66,1,2018-09-12 17:00:55,2018-09-12 17:00:55,2018-09-12 17:00:55,spp,2009-12-08 10:59:36
67,1,2017-12-29 11:58:23,2017-12-29 11:58:23,2017-12-29 11:58:23,x310,2019-03-26 13:29:46
