In [1]:
# Import necessary tools
import numpy  as np
import pandas as pd
import scipy  as sp
from os import getenv

In [2]:
# Set variables
WD = %pwd

SRC_DIR = WD + "/.."
BUILD_DIR = WD + "/../../build"
OUT_DIR=BUILD_DIR + "/statistics/analysis" # Where we will place the analysis' results
CSV_DIR=OUT_DIR + "/csv" # Where we palce the CSV subfiles

%env SRC_DIR=$SRC_DIR
%env BUILD_DIR=$BUILD_DIR
%env OUT_DIR=$OUT_DIR
%env CSV_DIR=$CSV_DIR

env: SRC_DIR=/home/anthonyd973/Git/Git_Projects/swarmlist-list-based/src/statistics/..
env: BUILD_DIR=/home/anthonyd973/Git/Git_Projects/swarmlist-list-based/src/statistics/../../build
env: OUT_DIR=/home/anthonyd973/Git/Git_Projects/swarmlist-list-based/src/statistics/../../build/statistics/analysis
env: CSV_DIR=/home/anthonyd973/Git/Git_Projects/swarmlist-list-based/src/statistics/../../build/statistics/analysis/csv


# Data fetching

- Extract bzipped result. **One may put their own results under `<git's root>/build/experiment` instead**, in which case the extracting will be ignored.

In [3]:
%%bash

if [ ! -e "$BUILD_DIR/experiment" ]
then

    ARCHIVE="$SRC_DIR/statistics/results.tbz"
    
    mkdir -p "$BUILD_DIR"
    tar -xjf "$ARCHIVE" -C "$BUILD_DIR"
fi

- Split result CSV file into many smaller CSV files. Each smaller file contains the data of one `{topology, drop rate, number of robots}` experiment configuration.

In [4]:
%%bash

RES_IN="$BUILD_DIR/experiment/res.csv" # Result CSV file that we use as input
if [ ! -f "$RES_IN" ]; then echo "$RES_IN: File not found."; exit 1; fi

mkdir -p "$OUT_DIR"
mkdir -p "$CSV_DIR"

#########################################

for protocol in consensus all adding removing
do
    for topology in line cluster scalefree
    do
        for drop in "0" "0.25" "0.5" "0.75"
        do
            # We assume that the result CSV's data is already sorted by topology, then by
            # drop rate, then by number of robots, then by consensus time. This can be done,
            # for instance, using LibreOffice Calc's sort tool.
            # We can therefore consider that all experiments of same {topology, drop, numRobots}
            # configuration succeed each other.
            cat "$RES_IN" | grep -P "${protocol},${topology},${drop}?," |
                awk -F, '{
                    currNumRobots = $4
                    if (currNumRobots != numRobots) {
                        numRobots = currNumRobots
                        currCsvFile = "'$CSV_DIR'/'$topology'T_'$drop'D_" numRobots "R.csv"
                    }
                    print $0 >> currCsvFile
                }'
        done
    done
done

# Data analyzing

- Setup variables

In [10]:
COLUMNS = ["Protocol", "Topology", "Packet drop rate", "Num. robots", "Consensus time", "Num. packets sent", "Num. packets received", "Mean sent bandwidth (B/(timestep*robot))", "Mean received bandwidth (B/(timestep*robot))"]

In [16]:
df = pd.DataFrame(pd.read_csv(BUILD_DIR+"/statistics/analysis/csv/scalefreeT_0D_5000R.csv", names=COLUMNS))
df


Unnamed: 0,Protocol,Topology,Packet drop rate,Num. robots,Consensus time,Num. packets sent,Num. packets received,Mean sent bandwidth (B/(timestep*robot)),Mean received bandwidth (B/(timestep*robot))
0,,,,,,,,,
1,,,,,,,,,
2,,,,,,,,,
3,,,,,,,,,
4,,,,,,,,,
5,,,,,,,,,
6,,,,,,,,,
7,,,,,,,,,
8,,,,,,,,,
9,,,,,,,,,
