
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/BigDataWUR/AgML-CY-Bench/blob/main/notebooks/cybench_summary.ipynb)

# CY-Bench dataset summary

Based on a notebook contributed by Ioannis N. Athanasiadis.

## Install packages

In [None]:
%pip install --quiet plottable pycountry

## Load dataset summary

This dataset summary is produced by running the `cybench/runs/benchmark_summary.py`. The input data for this notebook is included in `cybench/runs/data_size_v1.4-per-year.txt`.

In [None]:
import os
import pandas as pd
import pycountry

maize_data_sizes = {}
wheat_data_sizes = {}
with open("data_size_v1.4-per-year.txt", "r") as f:
  for line in f:
    ds_name, ds_summary = line.split(":")
    cn_code = ds_name[-2:]
    # Greece seems to have 2 codes. EU uses EL. The more common is GR.
    if (cn_code == "EL"):
      cn_code = "GR"

    ds_size_row = []
    min_year = None
    max_year = None
    nloc = 0
    ndata_points = 0
    for yr_summary in ds_summary.split(","):
      # remove extra spaces
      yr_summary = yr_summary.strip()
      yr = yr_summary[:4]
      nloc_yr = int(yr_summary[5:-1])
      if (min_year is None) and (nloc_yr > 0):
        min_year = yr
      if (nloc_yr > 0):
        max_year = yr

      ndata_points += nloc_yr
      if (nloc_yr > nloc):
        nloc = nloc_yr

      ds_size_row.append(nloc_yr)

    min_max_year = str(min_year) + "-" + str(max_year)
    if (line.startswith("maize")):
      maize_data_sizes[cn_code] = [min_max_year, nloc, ndata_points, ds_size_row]
    else:
      wheat_data_sizes[cn_code] = [min_max_year, nloc, ndata_points, ds_size_row]


maize_ds = pd.DataFrame.from_dict(maize_data_sizes, columns=["duration_maize", "regions_maize",
                                                             "points_maize", "densities_maize"],
                                  orient="index")
wheat_ds = pd.DataFrame.from_dict(wheat_data_sizes, columns=["duration_wheat", "regions_wheat",
                                                             "points_wheat", "densities_wheat"],
                                  orient="index")
maize_ds.reset_index(inplace=True)
maize_ds.rename(columns={"index": "country_code"}, inplace=True)
maize_ds["country_name"] = maize_ds["country_code"].apply(lambda x: pycountry.countries.get(alpha_2=x).name)
maize_ds.set_index("country_name", inplace=True)
print(maize_ds.head(40).to_string())

wheat_ds.reset_index(inplace=True)
wheat_ds.rename(columns={"index": "country_code"}, inplace=True)
wheat_ds["country_name"] = wheat_ds["country_code"].apply(lambda x: pycountry.countries.get(alpha_2=x).name)
wheat_ds.set_index("country_name", inplace=True)
print(wheat_ds.head(40).to_string())

## Plot summaries

In [None]:
import matplotlib.axes
import typing

def densities_plot(ax: matplotlib.axes.Axes, val: typing.Any, hideAxes = True, startx=2003) -> matplotlib.container.BarContainer:
    bar = ax.bar(*[range(startx, startx+len(val))], val, color = "#35b0ab")
    if(hideAxes):
        ax.set_axis_off()
    return bar

In [None]:
# Install some nice fonts!
% pip install --quiet pyfonts

from pyfonts import load_font
import pooch

def get_font(font_url):
    font_path = pooch.retrieve(url=font_url, known_hash=None)
    font = load_font(font_path=font_path)
    return font

light_font = get_font("https://fontiko.com/system/uploads/font-files/94/Roboto-Light.ttf")
bold_font = get_font("https://fontiko.com/system/uploads/font-files/94/Roboto-Black.ttf")

In [None]:
from matplotlib.colors import LinearSegmentedColormap

cmap1 = LinearSegmentedColormap.from_list(
    name="bugw", colors=["#f2fbd2", "#c9ecb4", "#93d3ab", "#35b0ab"], N=256
)

### Maize data summary

In [None]:
from plottable import ColDef, Table
from plottable.cmap import normed_cmap

fig, ax = plt.subplots(figsize=(12, 30))

label_maize = "Maize"
label_wheat = "Wheat"

tab = Table(
            maize_ds,
            columns=['duration_maize','regions_maize','points_maize','densities_maize'],
            column_definitions=[
                ColDef(name="name", title ="Country",
                        textprops={"ha": "left"},
                        width=1.5,
                       ),

                # ColDef("flag",title="", width=1,
                #        textprops={"ha": "center"},
                #        plot_fn=circled_image,),
                ColDef(name="regions_maize",
                       title="SUBNATIONAL\n ADMIN REGIONS",
                       width=1.5,
                       textprops={"ha": "center",
                                  "bbox": {"boxstyle": "circle", "pad": 0.35},
                                #   "fontsize":9,
                                           },
                       cmap=normed_cmap(maize_ds['regions_maize'], cmap=cmap1, num_stds=5),
                       group=label_maize),

                ColDef(name="points_maize",
                       title="LABEL SIZE",
                       width=0.75,
                       textprops={"ha": "center",
                                  "bbox": {"boxstyle": "circle", "pad": 0.35},},
                       cmap=normed_cmap(maize_ds['points_maize'], cmap=cmap1, num_stds=5),
                       group=label_maize),

                ColDef(name="duration_maize",
                       title="Min Max Year",
                       width=1.5,
                       group=label_maize),

                ColDef("densities_maize", title="LABEL TIMELINE\n2003-2024", width=2,
                       group= label_maize,
                       plot_fn=densities_plot,
                       textprops={"ha": "center"}),



                ],
            row_dividers=True,
            col_label_divider=True,
            footer_divider=True,
            textprops={"fontsize": 12, "font":light_font, "ha":"center"},
            row_divider_kw={"linewidth": 1, "linestyle": (0, (1, 5))},
            col_label_divider_kw={"linewidth": 1, "linestyle": "-"},
            column_border_kw={"linewidth": 1, "linestyle": "-"},
        )

plt.show()

### Wheat data summary

In [None]:
from plottable import ColDef, Table
from plottable.cmap import normed_cmap

fig, ax = plt.subplots(figsize=(12, 24))

label_maize = "Maize"
label_wheat = "Wheat"

tab = Table(
            wheat_ds,
            columns=['duration_wheat', 'regions_wheat', 'points_wheat', 'densities_wheat'],
            column_definitions=[
                ColDef(name="name", title ="Country",
                        textprops={"ha": "left",
                                #    "font":bold_font,
                                   },
                        width=1.5,
                       ),
                # ColDef("flag",title="", width=1,
                #        textprops={"ha": "center"},
                #        plot_fn=circled_image,),
                ColDef(name="regions_wheat",
                       title="SUBNATIONAL\n ADMIN REGIONS",
                       width=1.5,
                       textprops={"ha": "center",
                                  "bbox": {"boxstyle": "circle", "pad": 0.35},
                                #   "fontsize":9,
                                           },
                       cmap=normed_cmap(wheat_ds['regions_wheat'], cmap=cmap1, num_stds=5),
                       group=label_wheat),

                ColDef(name="points_wheat",
                       title="LABEL SIZE",
                       width=0.75,
                       textprops={"ha": "center",
                                  "bbox": {"boxstyle": "circle", "pad": 0.35},},
                       cmap=normed_cmap(wheat_ds['points_wheat'], cmap=cmap1),
                       group=label_wheat),

                ColDef(name="duration_wheat",
                       title="Min Max Year",
                       width=1.5,
                       group=label_wheat),

                ColDef("densities_wheat", title="LABELS TIMELINE\n2003-2024", width=2,
                       group=label_wheat,
                       plot_fn=densities_plot,
                       plot_kw={"hideAxes": False},
                       textprops={"ha": "center"}),

                ],
            row_dividers=True,
            col_label_divider=True,
            footer_divider=True,
            textprops={"fontsize": 12, "font":light_font, "ha":"center"},
            row_divider_kw={"linewidth": 1, "linestyle": (0, (1, 5))},
            col_label_divider_kw={"linewidth": 1, "linestyle": "-"},
            column_border_kw={"linewidth": 1, "linestyle": "-"},
        )

plt.show()