# Analysis of the Datapoints Dataframe

## Imports and reading


In [58]:
import pandas as pd
from pathlib import Path
from utils.utils import print_pretty_df

# Quick ANSI color code shortcuts
r = "\033[31m"
y = "\033[33m"
g = "\033[32m"
b = "\033[34m"
e = "\033[0m"

pickleName = "all_datapoints.pkl"
datapointsDfPath = Path("..") / "data" / "Dataframes" / pickleName
datapointsDf = pd.read_pickle(datapointsDfPath)

In [59]:
# print(len(datapointsDf))
# print(len(datapointsDf[datapointsDf["Task"].isin(["Classification", "Object detection"])]))
print_pretty_df(datapointsDf)

+----+-------------------------------------------------------+-------------------------------+------------------+----------------+----------------+---------------------------------------------------+----------------------+-----------------------------------------+------------------------------------------------+----------------------+------------------+-----------+------------+-------------+-----------+--------------+-------------------+------------+-------------+-------------------------------------+----------------+-----------------------------------------+-------------------------------------------------------------+-------------------------------------------+------------+----------+----------+---------------------+
|    |                   BBT Citation Key                    |             Model             | Equivalent model |    Backbone    |    Modality    |                      Dataset                      |         Task         |               Application               |       

## Quick statistics / Overview


### Implementation means

Typical implementation tags look like `'RTL design (VHDL)'` or `'Vitis AI (1.4)'`.
When grouping them by "family" I discard information about language or version.


In [60]:
# --- Initial raw count ---
implementationCounts = datapointsDf["Implementation"].value_counts()
print(implementationCounts)
print()

# -- Group by "family", i.e., discard version or language information in between parenthesis ---
def determine_impl_group(index: str) -> str:
    return index.split("(")[0].strip()
implementationGrouped = implementationCounts.groupby(determine_impl_group).sum()
print(implementationGrouped.sort_values(ascending=False))

Implementation
RTL design (VHDL)       13
RTL design (N/A)         7
RTL design (Verilog)     7
N/A                      7
Vitis AI (N/A)           6
HLS (Vitis)              6
FINN                     4
Vitis AI (DNNDK)         4
HLS (N/A)                3
RTL design (XSG)         3
HLS (Vivado)             2
HLS (VGT)                1
Vitis AI (v2.5)          1
Vitis AI (v1.4)          1
HLS (MATLAB)             1
Name: count, dtype: int64

Implementation
RTL design    30
HLS           13
Vitis AI      12
N/A            7
FINN           4
Name: count, dtype: int64


### FPGA boards

A typical board tag looks like that: `'Zynq 7000 (Z7020) {Arty Z7}'`: `'<family> (<model>) {<evaluation board>}'`.

The following cell groups the board by these 3 criteria.


In [61]:
# --- Initial raw count (of the full tags) ---
boardCounts = datapointsDf["Board"].value_counts()
# print(boardCounts)
# print()

# -- Group by "family", i.e., discard model in between parenthesis and evaluation board in between brackets ---
def determine_board_family_group(index: str) -> str:
    return index.split("(")[0].strip()
boardFamilyGrouped = boardCounts.groupby(determine_board_family_group).sum()
print(boardFamilyGrouped.sort_values(ascending=False))
print()

# -- Group by "board specific model", i.e., discard family and evaluation name in between brackets ---
def determine_board_model_group(index: str) -> str:
    return index.split("(")[1].split("{")[0].strip()[:-1]
boardModelGrouped = boardCounts.groupby(determine_board_model_group).sum()
print(boardModelGrouped.sort_values(ascending=False))
print()

# -- Group by "evaluation board/kit" (the name in between curly braces) ---
def determine_board_eval_group(index: str) -> str:
    boardKit: str = index.split("{")[1][:-1].strip()
    return boardKit if boardKit else "N/A   "
boardKitGrouped = boardCounts.groupby(determine_board_eval_group).sum()
print(boardKitGrouped.sort_values(ascending=False))

Board
Zynq US+      24
Zynq 7000     15
Virtex-7      10
Artix-7        6
Kintex-7       3
Virtex-6       3
Kintex US      2
Alveo          1
Cyclone V      1
Spartan-3A     1
Name: count, dtype: int64

Board
XC7VX690T     10
XC7Z020       10
XCZU3EG        8
XCZU7EV        7
XCZU9EG        4
XC7A35T        4
XC6VLX240T     3
XC7K325T       3
XCZU15EG       3
XC7Z045        2
XCKU040        2
XC7A200T       2
XCZU19EG       1
5CSXC6         1
XCK26          1
XC7Z100        1
U280           1
XC7Z035        1
XC3SD1800A     1
Z7020          1
Name: count, dtype: int64

Board
N/A                     17
VC709                    7
ZCU104                   6
UltraZed-EG              4
PYNQ-Z1                  3
Arty-35T                 3
ZCU102                   3
OVC3                     3
Alinx AXU15EG            3
ZC706                    2
Z-turn                   2
AC701                    2
KV260                    2
KCU105                   2
KC705                    2
Ultra96      

### Model data

Model information is already split in 3 Series: `'Model'`, `'Equivalent model'` and `'Backbone'`.
I think grouping by `'Model'`, i.e., how the model is called in the article, makes no sense. However, even if `'Equivalent model'` is a subjective tag decided by myself, this conveys interressant information.
Same for `'Backbone'`.


In [62]:
# ----- Initial dataframe -----
print_pretty_df(datapointsDf[["Model", "Equivalent model", "Backbone"]], max_rows=10)

# ----- Group by "Equivalent model" -----
equivalentModelCounts = datapointsDf["Equivalent model"].value_counts()
print(equivalentModelCounts)
print()

# ----- Group by "Backbone" -----
backbineCounts = (datapointsDf["Backbone"].value_counts().rename(lambda x: "N/A" if x == "" else x))
print(backbineCounts)

+---+---------------------+------------------+----------------+
|   |        Model        | Equivalent model |    Backbone    |
+---+---------------------+------------------+----------------+
| 0 |         CNN         |       CNN        |                |
| 1 | TriCloudNet + U-Net |       CNN        |   SqueezeNet   |
| 2 |       YOLOv4        |      YOLOv4      |   Darknet53    |
| 3 |   YOLOv4-tiny 3L    |      YOLOv4      | Darknet53-tiny |
| 4 |      BRAM_DSP       |   Fuzzy ARTMAP   |                |
| 5 |       LUT_MUL       |   Fuzzy ARTMAP   |                |
| 6 |     LeNet-5 f32     |     LeNet-5      |    LeNet-5     |
| 7 |     LeNet-5 i8      |     LeNet-5      |    LeNet-5     |
| 8 |    Decision Tree    |        ML        |                |
| 9 |       2D CNN        |       CNN        |                |
+---+---------------------+------------------+----------------+
Equivalent model
CNN             28
ML               7
YOLOv2           7
YOLOv4           4
SSD        

In [63]:
# ----- Find which model is based on MobileNetv2 -----
# If "Model", "Equivalent model" or "Backbone" contains "Mobile"
nbTotal = 0
for strLookFor in ["Mobile", "tiny", "Squeeze", "Ghost", "ENet", "ESP"]:
    nb = 0
    for index, row in datapointsDf.iterrows():
        if strLookFor in row["Model"] or strLookFor in row["Equivalent model"] or strLookFor in row["Backbone"]:
            print(row["Model"], row["Equivalent model"], row["Backbone"])
            nb += 1
    print(f"Number of experiments with {b}{strLookFor} = {nb}{e}")
    nbTotal += nb
print(f"Total = {r}{nbTotal}{e}/{len(datapointsDf)}{e}, or {r}{nbTotal / len(datapointsDf) * 100:.2f}%{e}")



YOLOv4-MobileNetv3 YOLOv4 MobileNetv3
MobileNetv1Lite CNN MobileNetv1
CBFF-SSD SSD MobileNetv1
RFA-YOLO YOLOv4 MobileNeXt
CNN2@0.7 YOLOv2 MobileNetv1
CNN4@0.7 YOLOv2 MobileNetv2
Number of experiments with [34mMobile = 6[0m
YOLOv4-tiny 3L YOLOv4 Darknet53-tiny
Improved YOLOv4-tiny YOLOv4-tiny CSPDarknet53
Number of experiments with [34mtiny = 2[0m
TriCloudNet + U-Net CNN SqueezeNet
CNN6@1.6 YOLOv2 SqueezeNet
Number of experiments with [34mSqueeze = 2[0m
Ghost-YOLOS YOLOv3 GhostNet
Number of experiments with [34mGhost = 1[0m
ENet CNN 
Number of experiments with [34mENet = 1[0m
ESPNet CNN 
Number of experiments with [34mESP = 1[0m
Total = [31m13[0m/66[0m, or [31m19.70%[0m


## Trying to find tendencies

### Are Vitis AI implementations consuming more power?


In [64]:
# Let's plot the dataframe with only the following devices, and Deep Learning models
similar_devices = ["XCZU", "XCKU", "XQRKU", "XC7VX", "XCK26"]
DL_models = ["CNN", "YOLO", "AlexNet", "SSD", "Hybrid"]

# --- Filter the dataframe ---
similarBoardDf = datapointsDf[datapointsDf["Board"].str.contains("|".join(similar_devices), na=False)]
similarBoardAndModelsDf = similarBoardDf[
    similarBoardDf["Model"].str.contains("|".join(DL_models), na=False) |
    similarBoardDf["Equivalent model"].str.contains("|".join(DL_models), na=False) |
    similarBoardDf["Backbone"].str.contains("|".join(DL_models), na=False)
]
print(f"{len(similarBoardAndModelsDf)} experiments.")
# --- Print DF sorted by implementation ---
print_pretty_df(similarBoardAndModelsDf.sort_values(by="Implementation"))
# COmpute average "Power consumption" for all experiements using "Vitis AI", "HLS", and "RTL"
vitisAiDf = similarBoardAndModelsDf[similarBoardAndModelsDf["Implementation"].str.contains("Vitis AI", na=False)]
hlsDf = similarBoardAndModelsDf[similarBoardAndModelsDf["Implementation"].str.contains("HLS", na=False)]
rtlDf = similarBoardAndModelsDf[similarBoardAndModelsDf["Implementation"].str.contains("RTL", na=False)]
# write a function to compute the average power consumption of the given DF. Each cell should be pre-process: remove the "W" and convert to float
def compute_avg_power(df: pd.DataFrame) -> float:
    print_pretty_df(df)
    power = df["Power consumption"].str.replace("W", "")
    # Remove 'N/A' or empty values
    power = power[power != ""]
    print(power)
    return power.astype(float).mean()
print(f"Average power consumption for Vitis AI: {compute_avg_power(vitisAiDf)}W")
print(f"Average power consumption for HLS: {compute_avg_power(hlsDf)}W")
print(f"Average power consumption for RTL: {compute_avg_power(rtlDf)}W")


32 experiments.
+----+-------------------------------------------------------+----------------------+------------------+----------------+----------+---------------------------------------------------+----------------------+-----------------------------------------+-------------------------------------+----------------------+------------------+-----------+------------+------------+-----------+--------------+-------------------+------------+-------------+-------------------------------------+----------+-----------------------------------------+-------------------------------------------------------------+-------------------------------+------------+----------+----------+---------------------+
|    |                   BBT Citation Key                    |        Model         | Equivalent model |    Backbone    | Modality |                      Dataset                      |         Task         |               Application               |                Board                |    Implement

### What is the best kind of design for YOLO models with similar complexities?


In [65]:
# # Filter the original dataset to see only models with "YOLO" in their name, equivalent model, or backbone
yoloDf = datapointsDf[datapointsDf["Model"].str.contains("YOLO", na=False) | datapointsDf["Equivalent model"].str.contains("YOLO", na=False) | datapointsDf["Backbone"].str.contains("YOLO", na=False)]

def convert_complexity_to_float(val: str):
    replacements = {"OP": "", "B": "e9", "G": "e6", "M": "e3", "K": "e3"}
    for old, new in replacements.items():
        val = val.replace(old, new)
    try:
        return float(val)
    except ValueError:
        return None

def filter_complexity(df: pd.DataFrame, threshold: float) -> pd.DataFrame:
    result_df = df.copy()
    result_df["clean_complexity"] = result_df["Complexity"].apply(
        lambda x: convert_complexity_to_float(x)
    )
    result_df.dropna(subset=["clean_complexity"], inplace=True)
    return result_df[result_df["clean_complexity"] > threshold]

filteredDf = filter_complexity(yoloDf, 1e6)
print_pretty_df(filteredDf.sort_values(by="Implementation"))

+----+------------------------------------------+-----------------+------------------+----------------+----------+------------------------------------+------------------+-------------------------+------------------------------+----------------------+------------------+-----------+----------+------------+-----------+--------------+-------------------+-----------+------------+-----------------+----------+------------------------------+---------------------------------------+-----------------------------+------------+----------+----------+-------------------+------------------+
|    |             BBT Citation Key             |      Model      | Equivalent model |    Backbone    | Modality |              Dataset               |       Task       |       Application       |            Board             |    Implementation    | Publication year |  Latency  |   FPS    | Task score | Footprint |  Throughput  | Power consumption | Frequency | Complexity |     Design      |  Memory  |          Pr

### What the best study on a DOTAv1.0, DIOR, or UP?


In [66]:
# Filter experiments that use the "University" dataset
datasetDf = datapointsDf[datapointsDf["Dataset"].str.contains("DIOR", na=False)] #NWPU-RE
print_pretty_df(datasetDf.sort_values(by="Publication year"))# Filter experiments that use the "University" dataset

+----+----------------------------------------+--------------------+------------------+-------------+----------+-------------------------+------------------+-------------+-----------------------------+-----------------+------------------+---------+-----------+------------+-----------+------------+-------------------+-----------+------------+--------+--------+-----------+---------------+-----------------------------+------------+----------+----------+-------------------+
|    |            BBT Citation Key            |       Model        | Equivalent model |  Backbone   | Modality |         Dataset         |       Task       | Application |            Board            | Implementation  | Publication year | Latency |    FPS    | Task score | Footprint | Throughput | Power consumption | Frequency | Complexity | Design | Memory | Precision | Optimizations |          FPGA Util          | DPU Config | DPU Core | DPU Util | DPU Optimizations |
+----+----------------------------------------+---

In [67]:
taskDf = datapointsDf[datapointsDf["Task"].str.contains("Pixel", na=False)] #  | datapointsDf["Application"].str.startswith("Ship", na=False)
print_pretty_df(taskDf.sort_values(by="Application"))
print(len(taskDf))
print(datapointsDf["Application"].value_counts())

+----+-----------------------------------------+---------------------------+------------------+----------+----------------+--------------------------------------------+----------------------+--------------------------------------+-------------------------------------+----------------------+------------------+-----------+-----+------------+-----------+-------------+-------------------+-----------+-------------+--------------------+----------+------------+---------------------------------------+-----------------------------+------------+----------+----------+-------------------+
|    |            BBT Citation Key             |           Model           | Equivalent model | Backbone |    Modality    |                  Dataset                   |         Task         |             Application              |                Board                |    Implementation    | Publication year |  Latency  | FPS | Task score | Footprint | Throughput  | Power consumption | Frequency | Complexity  |   

### Datasets, RS Applications and ML formulations

In our reporting method, each experiment is performed on a unique Dataset (i.e., we selected the most relevant/common dataset when authors reported results on different one).
Each dataset is used (or even built) for a specific Remote Sensing application which is formulated as a Machine Learning problem or task.

The `Application` tag is no exact science and was kept in order to give some context.


In [68]:
# ----- Print all unique ML problem formulations -----
mlTaskList = datapointsDf["Task"].unique()
print(mlTaskList)

# ----- Group by "Dataset" -----
DatasetCount = datapointsDf["Dataset"].value_counts()
print(DatasetCount)

# ----- Group by "Application" -----
applicationCount = datapointsDf["Application"].value_counts()
print(applicationCount)


['Classification' 'Object detection' 'Pixel classification' 'Regression'
 'Segmentation']
Dataset
University of Pavia {Pixel classification}           6
MSTAR {Classification}                               5
DOTAv1.0 {Object Detection}                          4
NWPU-RESISC45 {Classification}                       4
Potsdam {Segmentation}                               4
SSDD {Object Detection}                              3
AVIRIS-NG {Pixel classification}                     3
PennSyn2Real {Object Detection}                      3
UAV RGB (cust.) {Object Detection}                   3
Landsat-8 (cust.) {Classification}                   2
L8 Biome {Classification}                            2
RGB (cust.) {Classification}                         2
DIOR {Object Detection}                              2
MASATI {Classification}                              2
ALOS-2 (cust.) {Classification}                      2
UAV RGB (cust.) {Classification}                     2
UAV RGB (cust.) {Pixel

Print unique associations of Task and applications


In [69]:
print_pretty_df(datapointsDf.sort_values(by="Board")[["Board", "Implementation", "Model", "Equivalent model", "Backbone", "Task", "Application"]])

+----+------------------------------------------------+----------------------+-------------------------------+------------------+----------------+----------------------+-----------------------------------------+
|    |                     Board                      |    Implementation    |             Model             | Equivalent model |    Backbone    |         Task         |               Application               |
+----+------------------------------------------------+----------------------+-------------------------------+------------------+----------------+----------------------+-----------------------------------------+
| 55 |                Alveo (U280) {}                 |      HLS (N/A)       |              GNN              |       GNN        |                |    Classification    |     Military targets identification     |
| 36 |           Artix-7 (XC7A200T) {AC701}           |  RTL design (VHDL)   |        Improved YOLOv2        |      YOLOv2      |   Darknet19    |   Obj

In [70]:
# Print unique associations of Task and applications
taskApplicationPairs = datapointsDf[["Task", "Modality", "Application"]].value_counts()
print(taskApplicationPairs.sort_index())

Task                  Modality        Application                            
Classification        HSI             Air Quality Monitoring                     1
                                      Cloud coverage                             1
                      RGB             Cloud coverage                             6
                                      Landcover/Land use                         6
                                      Ship identification                        3
                      SAR             Military targets identification            5
                                      Ship identification                        2
Object detection      RGB             Aircraft detection                         2
                                      Diverse                                    9
                                      Flying-object detection                    5
                                      Railway track fastener defect detection    1
         

## Deeper analysis of common dataset and tasks among the dataframe

I try to check how many models (and papers) use the same datasets


In [71]:
# For the first 4 datasets, print each model's name, the corresponding article research key and their metric
for dataset in DatasetCount.index[:4]:
    print(f"\n{b}{dataset}{e} dataset:")
    for index, row in datapointsDf.iterrows():
        if row["Dataset"] == dataset:
            print(
                f"    - {r}{row['Model']}{e} from {b}{row['BBT Citation Key']}{e} with board: {r}{row['Board']}{e} ({b}{row['Implementation']}{e}):"
            )
            print(
                f"        Score: {g}{row['Task score']}{e}, Size: {g}{row['Footprint']}{e}, Latency: {g}{row['Latency']}{e}, Throughput: {g}{row['Throughput']}{e}, Power: {g}{row['Power consumption']}{e}"
            )


[34mUniversity of Pavia {Pixel classification}[0m dataset:
    - [31m2D CNN[0m from [34mheConfigurable2D3D2023a[0m with board: [31mZynq US+ (XCZU15EG) {Alinx AXU15EG}[0m ([34mRTL design (N/A)[0m):
        Score: [32m98.24% OA[0m, Size: [32m1.20 MB[0m, Latency: [32m0.097 ms*[0m, Throughput: [32m7.07 GOP/s[0m, Power: [32m8.40 W[0m
    - [31m3D CNN[0m from [34mheConfigurable2D3D2023a[0m with board: [31mZynq US+ (XCZU15EG) {Alinx AXU15EG}[0m ([34mRTL design (N/A)[0m):
        Score: [32m94.09% OA[0m, Size: [32m0.12 MB[0m, Latency: [32m1.11 ms*[0m, Throughput: [32m3.81 GOP/s[0m, Power: [32m8.40 W[0m
    - [31mHybridSN[0m from [34mheConfigurable2D3D2023a[0m with board: [31mZynq US+ (XCZU15EG) {Alinx AXU15EG}[0m ([34mRTL design (N/A)[0m):
        Score: [32m100% OA[0m, Size: [32m20.50 MB[0m, Latency: [32m7.71 ms*[0m, Throughput: [32m13.18 GOP/s[0m, Power: [32m8.40 W[0m
    - [31mLPDBL[0m from [34mshibiOnboardTargetDetection2021a[0m 

### Check common tasks


In [72]:
# For each Task, group by dataset and print each model name and backbones
MLTaskCount = datapointsDf["Task"].value_counts()
for task, value in MLTaskCount.items():
    print(f"\n{b}{task}{e} task (total: {value}):")
    nbItems = 0
    for dataset in DatasetCount.index:
        if dataset.split("{")[1][:-1].strip().upper() == task.upper():
            print(f"    {b}{dataset.split('{')[0].strip()}{e}:")
            for index, row in datapointsDf.iterrows():
                if row["Task"] == task and row["Dataset"] == dataset:
                    print(
                        f"        - {r}{row['Model']}{e} ({row['Equivalent model']}) [{y}{row['Backbone']}{e}] from {b}{row['BBT Citation Key']}{e} with board: {r}{row['Board']}{e} ({row['Implementation']}):"
                    )
                    print(
                        f"            Score: {g}{row['Task score']}{e}, Size: {g}{row['Footprint']}{e}, Latency: {g}{row['Latency']}{e}, Throughput: {g}{row['Throughput']}{e}, Power: {g}{row['Power consumption']}{e}"
                    )
                    nbItems += 1

    print(f"    Total: {nbItems}")


[34mClassification[0m task (total: 24):
    [34mMSTAR[0m:
        - [31mLeNet-5 f32[0m (LeNet-5) [[33mLeNet-5[0m] from [34mweiFPGABasedHybridTypeImplementation2019[0m with board: [31mKintex-7 (XC7K325T) {KC705}[0m (RTL design (N/A)):
            Score: [32m98.76% OA[0m, Size: [32m6.64 MB[0m, Latency: [32m2.29 ms[0m, Throughput: [32m[0m, Power: [32m[0m
        - [31mLeNet-5 i8[0m (LeNet-5) [[33mLeNet-5[0m] from [34mweiFPGABasedHybridTypeImplementation2019[0m with board: [31mKintex-7 (XC7K325T) {KC705}[0m (RTL design (N/A)):
            Score: [32m97.77% OA[0m, Size: [32m1.66 MB[0m, Latency: [32m2.29 ms[0m, Throughput: [32m[0m, Power: [32m[0m
        - [31mLeNet-5[0m (LeNet-5) [[33mLeNet-5[0m] from [34mchenHardwareImplementationConvolutional2020[0m with board: [31mKintex-7 (XC7K325T) {}[0m (N/A):
            Score: [32m98.18% OA[0m, Size: [32m[0m, Latency: [32m2.29 ms[0m, Throughput: [32m[0m, Power: [32m[0m
        - [31mGNN[0m

## Analyze reporting: missing metrics


In [73]:
def is_undefined(item) -> bool:
    if isinstance(item, str):
        return item.startswith("N/A") or item.startswith("???") or item == ""
    elif isinstance(item, list):
        return all(is_undefined(subitem) for subitem in item)
    else:
        raise ValueError(f"Unsupported type: {type(item)}")
    
# ---  Quick check if any of the main information is missing ---
for index, article in datapointsDf.iterrows():
    if is_undefined(article["Model"]):
        print(f"Item N°{b}{index}{e} has no Model")
    if is_undefined(article["Dataset"]):
        print(f"Item N°{b}{index}{e} has no Dataset")
    if is_undefined(article["Board"]):
        print(f"Item N°{b}{index}{e} has no Board")
    if is_undefined(article["Task"]):
        print(f"Item N°{b}{index}{e} has no Task")

### Check per article: Which article miss the most metrics

#### First for the "performance" metrics


In [74]:
performanceMetrics: list[str] = [
    "Latency",
    "Task score",
    "Footprint",
    "Throughput",
    # "Frequency",
    "Complexity",
    "Power consumption",
]
# --- Compute the number of missing (performance) metrics for each model ---
# Add a column to the dataframe with the number of missing metrics
datapointsDf["Missing perf metrics"] = datapointsDf.apply(
    lambda article: sum(
        [
            is_undefined(article[metric])
            for metric in performanceMetrics
        ]
    ),
    axis=1,
)
print(f"{len(datapointsDf[datapointsDf['Missing perf metrics'] >= 1])}/{len(datapointsDf)}")
# Print average number of missing metrics
print(
    f"Average number of missing metrics: {datapointsDf['Missing perf metrics'].mean()}"
)

# --- Compute the number of missing (performance) metrics for each study ---
# Group by the citation key, aggregating the total missing metrics per study
missing_per_study = (
    datapointsDf
    .groupby("BBT Citation Key", as_index=False)["Missing perf metrics"].mean()
)
# Print the fraction of studies with any missing metrics
print(
    f"{len(missing_per_study[missing_per_study['Missing perf metrics'] >= 1])}"
    f"/{len(missing_per_study)} studies have missing metrics"
)
# Print the fraction of studies with any missing metrics
print(
    f"{len(missing_per_study[missing_per_study['Missing perf metrics'] >= 2])}"
    f"/{len(missing_per_study)} studies have more than 1 missing metrics"
)
# Print the fraction of studies with any missing metrics
print(
    f"{len(missing_per_study[missing_per_study['Missing perf metrics'] >= 3])}"
    f"/{len(missing_per_study)} studies have more than 2 missing metrics"
)
# Print the average number of missing metrics per study
print(
    f"Average number of missing metrics (per study): "
    f"{missing_per_study['Missing perf metrics'].mean()}"
)

54/66
Average number of missing metrics: 2.4393939393939394
41/46 studies have missing metrics
35/46 studies have more than 1 missing metrics
27/46 studies have more than 2 missing metrics
Average number of missing metrics (per study): 2.75


#### Then for the "FPGA" metrics


In [75]:
FPGAMetrics: list[str] = [
    "Design",
    "Memory",
    "Precision",
    "Optimizations",
    "FPGA Util",
]
DPUMetrics: list[str] = [
    "Precision",
    "DPU Config",
    "DPU Core",
    "DPU Optimizations",
    "DPU Util",
]

# --- Compute the number of missing (fpga) metrics for each model ---
datapointsDf["Missing fpga metrics"] = datapointsDf.apply(
    lambda article: sum(
        [
            is_undefined(article[metric])
            for metric in FPGAMetrics
        ]
    ),
    axis=1,
)
datapointsDf["Missing dpu metrics"] = datapointsDf.apply(
    lambda article: sum(
        [
            is_undefined(article[metric])
            for metric in DPUMetrics
        ]
    ),
    axis=1,
)
# Print only the 'BBT Citation Key' and the missing metrics, only if there is more than 3 missing metrics
print_pretty_df(
    datapointsDf[
        ["BBT Citation Key", "Missing fpga metrics", "Missing dpu metrics"]
    ]
)

+----+-------------------------------------------------------+----------------------+---------------------+
|    |                   BBT Citation Key                    | Missing fpga metrics | Missing dpu metrics |
+----+-------------------------------------------------------+----------------------+---------------------+
| 0  |          vitoloRealTimeOnboardSatellite2024           |          0           |          4          |
| 1  |                 kimOnOrbitAICloud2024                 |          1           |          5          |
| 2  |         nguyenFPGASoCImplementationYOLOv42024         |          4           |          3          |
| 3  |         nguyenFPGASoCImplementationYOLOv42024         |          4           |          3          |
| 4  |        yahiaouiParallelizationFuzzyARTMAP2017a        |          0           |          4          |
| 5  |        yahiaouiParallelizationFuzzyARTMAP2017a        |          0           |          4          |
| 6  |       weiFPGABasedHyb

### Check per metric: Which metric are the less reported


In [76]:
allMetrics = performanceMetrics + FPGAMetrics + DPUMetrics
for metric in allMetrics:
    missing_metrics = 0
    for index, article in datapointsDf.iterrows():
        if is_undefined(article[metric]):
            missing_metrics += 1

    print(f"{r}{missing_metrics}{e} models miss the {b}{metric}{e} metric.")

[31m23[0m models miss the [34mLatency[0m metric.
[31m6[0m models miss the [34mTask score[0m metric.
[31m36[0m models miss the [34mFootprint[0m metric.
[31m42[0m models miss the [34mThroughput[0m metric.
[31m36[0m models miss the [34mComplexity[0m metric.
[31m18[0m models miss the [34mPower consumption[0m metric.
[31m17[0m models miss the [34mDesign[0m metric.
[31m17[0m models miss the [34mMemory[0m metric.
[31m2[0m models miss the [34mPrecision[0m metric.
[31m26[0m models miss the [34mOptimizations[0m metric.
[31m14[0m models miss the [34mFPGA Util[0m metric.
[31m2[0m models miss the [34mPrecision[0m metric.
[31m56[0m models miss the [34mDPU Config[0m metric.
[31m63[0m models miss the [34mDPU Core[0m metric.
[31m61[0m models miss the [34mDPU Optimizations[0m metric.
[31m62[0m models miss the [34mDPU Util[0m metric.
