# Qualitative analysis

# Read data

In [10]:
#@title Read all necessary data
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np
np.random.seed(1)

YEAR_START = 2013
YEAR_NOW = 2022

df1 = pd.read_csv("data/all_ITO_00101.csv")
level_key = pd.read_csv("data/global_map_level_keys_ITO_00101.csv").rename(columns={
    "l1": "l1_label",
    "l2": "l2_label",
    "l3": "l3_label",
    "task": "task_label"
})
df1 = df1.merge(level_key, how="left", on=["l1_label", "l2_label", "l3_label", "task_label"])


df2 = pd.read_csv("data/all_ITO_00141.csv")
level_key = pd.read_csv("data/global_map_level_keys_ITO_00141.csv").rename(columns={
    "l1": "l1_label",
    "l2": "l2_label",
    "l3": "l3_label",
    "task": "task_label"
})
df2 = df2.merge(level_key, how="left", on=["l1_label", "l2_label", "l3_label", "task_label"])

all = pd.concat([df1, df2], ignore_index=True)
all["date"] = pd.to_datetime(all["date"])
all["year"] = all["date"].dt.year
all["result"] = all["result"].astype("float")

all.loc[all["level_key"].isna(), "level_key"] = ""
all = all.drop(["l1_label", "l2_label", "l3_label"], axis=1)

# Seperate charts

## New Benchmarks

Below chart depicts the number of new benchmarks every year (Different metrics of a benchmark are aggregated, meaning the year a benchmark is reported as NEW is the year **any** metric is first reported)

\# results > 3: benchmark has at least 3 reported results in at least one metric; examples: 
+ True if 4 results in accuracy, only 1 in recall
+ False if 2 results in accuracy, 2 in recall

In [11]:
#@title New benchmarks
all["year"] = all["date"].dt.year

new = all.copy(deep = True)

new["# results > 3"] = True
idxe = all.groupby(["top_level_class_label", "task_label", "level_key", "dataset_label", "metric_label"]).filter(lambda x: len(x) <= 3).index
new.loc[list(idxe),"# results > 3"] = False

new = new.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "year"])["# results > 3"].any().reset_index()
new = new.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "# results > 3"])['year'].min().reset_index()

new = new[new["year"] < YEAR_NOW]

for tlc, grp in new.groupby(["top_level_class_label"]):   
    # group = pd.read_csv(f"trajectory_grouping_{grp.iloc[0]['top_level_class'].replace('https://identifiers.org/ito:', '')}.csv")
    grp["year"] = grp["year"].astype(str)
    grp = grp.sort_values(by="# results > 3", ascending=False)

    # needed for multisuperclass prblm
    grp = grp[[x for x in grp.columns if x not in ["level_key"]]].drop_duplicates()

    # set cumulative = True, # benchmarkes created till now
    fig = px.histogram(grp, x="year", title=f"New benchmarks: {tlc}", color="# results > 3").update_xaxes(categoryorder='category ascending')
    fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_new_benchmarks.png")
    fig.show()

## Disbanded benchmarks

Below chart depicts the number of disbanded benchmarks every year (Different metrics of a benchmark are aggregated, meaning the year a benchmark is reported as disbanded is the year **after** **any** metric is last reported)

Important **year after**

\# results > 3: benchmark has at least 3 reported results in at least one metric; examples: 
+ True if 4 results in accuracy, only 1 in recall
+ False if 2 results in accuracy, 2 in recall

In [12]:
#@title Disbanded benchmarks
disbanded = all.copy(deep = True)

disbanded["# results > 3"] = True
idxe = all.groupby(["top_level_class_label", "task_label", "level_key", "dataset_label", "metric_label"]).filter(lambda x: len(x) <= 3).index
disbanded.loc[list(idxe),"# results > 3"] = False

disbanded = disbanded.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "year"])["# results > 3"].any().reset_index()

disbanded = disbanded.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "# results > 3"])['year'].max().reset_index()
disbanded["year"] = disbanded["year"] + 1

disbanded = disbanded[disbanded["year"] < YEAR_NOW]

for tlc, grp in disbanded.groupby(["top_level_class_label"]):   
    # group = pd.read_csv(f"trajectory_grouping_{grp.iloc[0]['top_level_class'].replace('https://identifiers.org/ito:', '')}.csv")
    grp["year"] = grp["year"].astype(str)
    grp = grp.sort_values(by="# results > 3", ascending=False)

    # needed for multisuperclass prblm
    grp = grp[[x for x in grp.columns if x not in ["level_key"]]].drop_duplicates()

    fig = px.histogram(grp, x="year", title=f'Disbanded benchmarks: {tlc}', color="# results > 3").update_xaxes(categoryorder='category ascending')
    fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_disbanded_benchmarks.png")
    fig.show()

## SOTA analysis

Below chart depicts the number of benchmarks where SOTA results and non-SOTA results were reported. Note: This is not the number of SOTA or non-SOTA results reported every year, it is the number of benchmarks reporting SOTA and non-SOTA results. 

(Different metrics of a benchmark are aggregated, meaning the year a benchmark reports a SOTA is the year **any** metric is a SOTA)


\# results > 3: benchmark has at least 3 reported results in at least one metric; examples: 
+ True if 4 results in accuracy, only 1 in recall
+ False if 2 results in accuracy, 2 in recall

Note: First entries in benchmarks are not considered in this chart (neither sota nor non-sota, they are counted as "New benchmarks", see chart above)

In [13]:
#@title SOTA analysis

def is_sota(x):
    a = x.copy(deep=True)
    a["result"] = a["result"].cummax()
    a["result"] = a.duplicated(subset=["result"]) 
    # uncomment to count first appearance as non-sota
    # a.iloc[0,1] = True
    # count first appearance neither as sota or non-sota
    a = a.iloc[1:, :]
    a = a.set_index(["year"])
    return ~a

# new aggregation (count benchmarks only once)

sota = all.copy(deep = True)

sota["# results > 3"] = True
idxe = all.groupby(["top_level_class_label", "task_label", "level_key", "dataset_label", "metric_label"]).filter(lambda x: len(x) <= 3).index
sota.loc[list(idxe),"# results > 3"] = False
idxe = sota.groupby(["top_level_class_label", "task_label", "level_key", "dataset_label"])["# results > 3"].filter(lambda x: x.any()).index
sota.loc[list(idxe),"# results > 3"] = True

#remove
# sota = sota[sota["# results > 3"] == True]

# get the maximum of each year
sota = sota.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "metric_label", "# results > 3", "year"])['result'].max().reset_index()
# get the sotas of each year
sota = sota.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "metric_label", "# results > 3"])[["year", "result"]].apply(is_sota).reset_index()
# aggregate sotas over year (at least one sota in any metric)
sota = sota.groupby(["top_level_class", "top_level_class_label", "task_label", "level_key", "dataset_label", "# results > 3", "year"])["result"].any().reset_index()

sota = sota[sota["year"] < YEAR_NOW]


for tlc, grp in sota.groupby(["top_level_class_label"]):   
    grp["Is SOTA and bench > 3 results"] = grp[["result", "# results > 3"]].apply(lambda x: "SOTA and # results > 3" if (x["result"] and x["# results > 3"]) else "SOTA and not # results > 3" if (x["result"] and not x["# results > 3"]) else "not SOTA and # results > 3" if (not x["result"] and x["# results > 3"]) else "not SOTA and not # results > 3", axis=1)

    # group = pd.read_csv(f"trajectory_grouping_{grp.iloc[0]['top_level_class'].replace('https://identifiers.org/ito:', '')}.csv")
    grp["year"] = grp["year"].astype(str)
    grp = grp.sort_values(by="Is SOTA and bench > 3 results", ascending=False)

    # needed for multisuperclass prblm
    grp = grp[[x for x in grp.columns if x not in ["level_key"]]].drop_duplicates()

    fig = px.histogram(grp, x="year", title=f'SOTA: {tlc}', color="Is SOTA and bench > 3 results").update_xaxes(categoryorder='category ascending')
    fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_sota.png")
    fig.show()

# Composite charts

In [14]:
#@title Stacked chart
#@markdown Stacked chart of the above charts

new["result"] = 0
disbanded["result"] = 3
sota.loc[sota["result"] == True, "result"] = 1
sota.loc[sota["result"] == False, "result"] = 2
all_res = pd.concat([new, sota, disbanded])

for tlc, grp in all_res.groupby(["top_level_class_label"]):
    grp["year"] = grp["year"].astype(str)
    grp["result_label"] = grp["result"].map({0: 'New benchmarks', 1: 'New sota', 2: 'New entries but not sota', 3: 'Disbanded benchmarks'})

    # needed for multisuperclass prblm
    grp = grp[[x for x in grp.columns if x not in ["level_key"]]].drop_duplicates()

    # Here we use a column with categorical data
    fig = px.histogram(grp, x="year", color="result_label", pattern_shape="# results > 3", pattern_shape_map={
             True: "", False: "x",
             }, title=f"{tlc}").update_xaxes(categoryorder='category ascending')
    fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_composite.png")
    fig.show()

In [15]:
#@title # Active benchmarks
#@markdown Cummulative sum of new - disbanded

# piktogram = all_res[all_res["top_level_class_label"] == "Natural language processing"]
for tlc, piktogram in all_res.groupby("top_level_class_label"):

    # needed for multisuperclass prblm
    piktogram = piktogram[[x for x in piktogram.columns if x not in ["level_key"]]].drop_duplicates()

    piktogram["count"] = 1
    piktogram=piktogram.groupby(["year", "result"])["count"].sum().reset_index()
    type0 = piktogram[piktogram["result"] == 0][["year", "count"]].sort_values(by="year").set_index("year")
    type1 = piktogram[piktogram["result"] == 1][["year", "count"]].sort_values(by="year").set_index("year")
    type3 = piktogram[piktogram["result"] == 3][["year", "count"]].sort_values(by="year").set_index("year")

    active = type0.reindex(range(type0.index.min(),YEAR_NOW)).shift().fillna(0).subtract(type3, fill_value=0).cumsum()
    active = active.reset_index()
    sota__ = type1.reset_index()

    fig = go.Figure()

    fig.add_trace(go.Scatter(
                            x=[active["year"].min(), active["year"].max()], y=np.full(2, 0),
                            mode='lines',
                            line_color='white', showlegend=False))

    fig.add_trace(go.Scatter(x=active["year"], y=active["count"],
                    mode='lines', fill='tonexty', line_shape="spline"))

    """
    fig.add_trace(go.Scatter(
                            x=[sota__["year"].min(), sota__["year"].max()], y=np.full(2, 0),
                            mode='lines',
                            line_color='white', showlegend=False))

    fig.add_trace(go.Scatter(x=sota__["year"], y=sota__["count"],
                    mode='lines', fill='tonexty', line_shape="spline"))
    """
        
    #fig.update_traces(orientation='h', side='positive', width=3, points=False)
    fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, showlegend=False, title=f"{tlc}: # active benchmarks") # that way you hide the y axis ticks labels)
    fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_active.png")
    fig.show()

In [20]:

#@title Piktogram paper

import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np

def compose(df):

    a = df.copy(deep=True)
    a = a.drop_duplicates(subset=["year"])
    # set to arbitrary result for now

    type0 = df[df["result"] == 0][["year", "count"]].sort_values(by="year").set_index("year")
    type1 = df[df["result"] == 1][["year", "count"]].sort_values(by="year").set_index("year")
    type3 = df[df["result"] == 3][["year", "count"]].sort_values(by="year").set_index("year")
    
    active = type0.reindex(range(type0.index.min(),YEAR_NOW)).shift().fillna(0).subtract(type3, fill_value=0).cumsum()

    type4 = active.subtract(type1, fill_value=0).astype(int)

    return type4

for tlc, piktogram in all_res.groupby("top_level_class_label"):
  piktogram = piktogram[piktogram["# results > 3"] == True]


  # filter all with not level_key
  piktogram = piktogram[piktogram["level_key"] != ""]

  piktogram["task_label"] = piktogram["level_key"]
  piktogram = piktogram.dropna()

  """
  piktogram = piktogram[piktogram["task_label"] == "Computer code processing: Code Generation"]
  if len(piktogram) == 0:
    continue
  """
  
  piktogram = piktogram.sort_values(by=["task_label", "year", "result"])

  piktogram["count"] = 1
  piktogram=piktogram.groupby(["task_label", "year", "result"])["count"].sum().reset_index()

  piktogram = piktogram.groupby(["task_label"]).filter(lambda x: len(x) > 1 and x["count"].max() > 1)
  piktogram["result_label"] = piktogram["result"].map({0: 'New benchmarks', 1: 'New sota', 2: 'New entries but not sota', 3: 'Disbanded benchmarks'})


  type0 = piktogram[piktogram["result"] == 0]
  type1 = piktogram[piktogram["result"] == 1]
  type2 = piktogram[piktogram["result"] == 2]
  type3 = piktogram[piktogram["result"] == 3]
  type4 = piktogram.groupby(["task_label"])["result", "year", "count"].apply(compose).reset_index()

  # is there really no better way??? -.-
  type0 = type0[type0["year"] >= YEAR_START]
  type1 = type1[type1["year"] >= YEAR_START]
  type2 = type2[type2["year"] >= YEAR_START]
  type3 = type3[type3["year"] >= YEAR_START]
  type4 = type4[type4["year"] >= YEAR_START]

  import plotly.graph_objects as go



  fig = go.Figure()

  scale = 25

  fig.add_trace(go.Scatter(
      x=type0["year"] - 0.3,
      y=type0["task_label"],
      name='# Benchmarks new',
      mode="markers",
      marker=dict(
          symbol="circle",
          size=type0["count"] * scale,
          sizemode="area"
      ),
      hovertext="year: " + type0["year"].astype(str) + ", count: " + type0["count"].astype(str),
      hoverinfo="y+text+name"
  ))

  fig.add_trace(go.Scatter(
      x=type1["year"] - 0.1,
      y=type1["task_label"],
      name='# Benchmarks reporting SOTA',
      mode="markers",
      marker=dict(
          symbol="diamond",
          size=type1["count"] * scale,
          sizemode="area"
      ),
      hovertext="year: " + type1["year"].astype(str) + ", count: " + type1["count"].astype(str),
      hoverinfo="y+text+name"
  ))

  fig.add_trace(go.Scatter(
      x=type4["year"] + 0.1,
      y=type4["task_label"],
      name='# Benchmarks reporting no SOTA/no results',
      mode="markers",
      marker=dict(
          symbol="square",
          size=type4["count"] * scale,
          sizemode="area"
      ),
      hovertext="year: " + type4["year"].astype(str) + ", count: " + type4["count"].astype(str),
      hoverinfo="y+text+name"
  ))

  fig.add_trace(go.Scatter(
      x=type3["year"] + 0.3,
      y=type3["task_label"],
      name='# Benchmarks disbanded',
      mode="markers",
      marker=dict(
          symbol="x",
          size=type3["count"] * scale,
          sizemode="area"
      ),
      hovertext="year: " + type3["year"].astype(str) + ", count: " + type3["count"].astype(str),
      hoverinfo="y+text+name"
  ))

  fig.add_trace(go.Bar(
      x=list(range(YEAR_START,piktogram["year"].max()+1)),
      y=["!!!" for _ in range(YEAR_START,piktogram["year"].max()+1)],
      showlegend=False,
      hoverinfo="none",
      opacity=0.2
  ))

  #for paper only NLP and height=1200 was used
  #
  fig.update_layout(title_text=f"{tlc}", height=1000 if tlc == "Natural language processing" else 5000, legend=dict(
      orientation="h",
      yanchor="bottom",
      y=1.02,
      xanchor="right",
      x=1,
  ), 
      width=1920)
  fig.update_xaxes(dtick=1, range = [(YEAR_START) - 0.5, float(piktogram["year"].max()) + 0.5])
  #, showticklabels=False
  #'array', categoryarray=piktogram["task_label"].sort_values(ascending=False)
  fig.update_yaxes(categoryorder='array', categoryarray=sorted(list(piktogram["task_label"]) + ["!!!", "zzz"], reverse=True), tickvals=sorted(list(piktogram["task_label"]), reverse=True), side='right')

  fig.show()
  fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_pikto.png", scale=2)
  fig.write_html(f"artefacts/{tlc.replace(' ', '_').lower()}_pikto.html", include_plotlyjs="cdn")


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



## Further charts

In [17]:
#@title num active per task NOT NORMALIZED: Variante 1 (Gleicher Abstand tasks)
import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np
np.random.seed(1)

def min_max(x):
    a = x.copy(deep=True)
    a["count"] = (a["count"])
    a = a.set_index(["year"])
    return a

def cumsum(x):
    a = x.copy(deep=True)
    a = a.sort_values(by=["year"])
    a["count"] = a["count"].cumsum()
    return a.set_index("year")
    
def compose(df):

    a = df.copy(deep=True)
    a = a.drop_duplicates(subset=["year"])
    # set to arbitrary result for now

    type0 = df[df["result"] == 0][["year", "count"]].sort_values(by="year").set_index("year")
    type1 = df[df["result"] == 1][["year", "count"]].sort_values(by="year").set_index("year")
    type3 = df[df["result"] == 3][["year", "count"]].sort_values(by="year").set_index("year")
    active = type0.reindex(range(type0.index.min(),YEAR_NOW)).shift().fillna(0).subtract(type3, fill_value=0).cumsum()
    sota_prop = type1.divide(active, fill_value=0)

    return active



for tlc, piktogram in all_res.groupby("top_level_class_label"):

  piktogram = piktogram[piktogram["# results > 3"] == True]

  piktogram["task_label"] = piktogram["level_key"]+": "+ piktogram["task_label"]
  piktogram = piktogram.dropna()

  piktogram = piktogram.sort_values(by=["task_label", "year", "result"])

  piktogram["result_label"] = piktogram["result"].map({0: 'New benchmarks', 1: 'New sota', 2: 'New entries but not sota', 3: 'Disbanded benchmarks'})

  piktogram["count"] = 1
  piktogram=piktogram.groupby(["task_label", "year", "result"])["count"].sum().reset_index()

  # piktogram=piktogram.groupby(["task_label", "result"])["year", "count"].apply(cumsum).reset_index()

  piktogram=piktogram.groupby(["task_label"])["result", "year", "count"].apply(compose).reset_index()

  piktogram=piktogram.groupby(["task_label"])["year", "count"].apply(min_max).reset_index()

  piktogram = piktogram.groupby(["task_label"]).filter(lambda x: len(x) > 1 and x["count"].max() > 1)

  data = [(task, grp) for (task, grp) in piktogram.groupby(["task_label"])]

  colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', len(data), colortype='rgb')

  fig = go.Figure()

  scale = 5

  for index, (data_, color) in enumerate(zip(data, colors)):
      fig.add_trace(go.Scatter(
                              x=[data_[1]["year"].min(), data_[1]["year"].max()], y=np.full(2, len(data)-index) * scale,
                              mode='lines',
                              line_color='white', showlegend=False))

      fig.add_trace(go.Scatter(x=data_[1]["year"], y=data_[1]["count"] + (len(data)-index) * scale + 0.4,
                      mode='lines',
                      name=data_[0], 
                      fill='tonexty', 
                      line_shape="spline",
                      hovertext=data_[1]["count"],
                      hoverinfo="x+text+name"
                      ))

      fig.add_trace(go.Scatter(
          x=[YEAR_START, YEAR_NOW], y=np.full(2, len(data)-index) * scale,
          mode='lines',
          line_color='white', showlegend=False))

      # plotly.graph_objects' way of adding text to a figure
      fig.add_annotation(
          x=YEAR_START,
          y=(len(data)-index) * scale,
          text=f'{data_[0]}',
          showarrow=False,
          yshift=10)
      
  #fig.update_traces(orientation='h', side='positive', width=3, points=False)
  fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, height=50*len(piktogram["task_label"].unique()), title=f"{tlc}", showlegend=False, yaxis=dict(showticklabels=False)) # that way you hide the y axis ticks labels)
  fig.show()
  fig.write_html(f"artefacts/{tlc.replace(' ', '_').lower()}_task_active.html", include_plotlyjs="cdn")
  fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_task_active.png", scale=2)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [19]:
#@title Comparison number sota <-> active

import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np
np.random.seed(1)
    
for tlc, piktogram in all_res.groupby("top_level_class_label"):

  # piktogram = piktogram[piktogram["# results > 3"] == True]

  """
  grouping = pd.read_csv("trajectory_grouping_ITO_00141.csv")
  grouping.rename(columns={"Class_Label": "task_label"}, inplace=True)

  piktogram = piktogram.merge(grouping, on="task_label", how="left")
  piktogram["task_label"] = piktogram["Suggested_label"]+": "+ piktogram["task_label"]
  piktogram = piktogram.dropna()
  """

  #needed due to multi superclass problem
  piktogram = piktogram[[x for x in piktogram.columns if x not in ["level_key"]]].drop_duplicates()
  
  piktogram = piktogram.sort_values(by=["task_label", "year", "result"])

  piktogram["result_label"] = piktogram["result"].map({0: 'New benchmarks', 1: 'New sota', 2: 'New entries but not sota', 3: 'Disbanded benchmarks'})

  piktogram["count"] = 1
  piktogram=piktogram.groupby(["year", "result"])["count"].sum().reset_index()

  # piktogram=piktogram.groupby(["task_label", "result"])["year", "count"].apply(cumsum).reset_index()


  # Compose

  type0 = piktogram[piktogram["result"] == 0][["year", "count"]].sort_values(by="year").set_index("year")
  type1 = piktogram[piktogram["result"] == 1][["year", "count"]].sort_values(by="year").set_index("year")
  type3 = piktogram[piktogram["result"] == 3][["year", "count"]].sort_values(by="year").set_index("year")

  active = type0.reindex(range(type0.index.min(),YEAR_NOW)).shift().fillna(0).subtract(type3, fill_value=0).cumsum()

  # active_norm = min_max(active.reset_index()).reset_index()
  active_norm = active.reset_index()

  # type1 = type1.reindex(range(active.index.min(),active.index.max())).interpolate()
  # sota_prop = type1.divide(active, fill_value=0).reset_index()
  sota_prop = type1.reset_index()

  """
  print(type0.cumsum())
  print(type3.cumsum())
  print(active)
  print(type1)
  print(sota_prop)
  """

  fig = go.Figure()

  fig.add_trace(go.Scatter(
                          x=[sota_prop["year"].min(), sota_prop["year"].max()], y=np.full(2, 0),
                          mode='lines',
                          line_color='white', showlegend=False))

  fig.add_trace(go.Scatter(x=sota_prop["year"], y=sota_prop["count"],
                  mode='lines', fill='tonexty', line_shape="spline", name="#Benchmark reporting SOTA"))

  fig.add_trace(go.Scatter(
                          x=[active_norm["year"].min(), active_norm["year"].max()], y=np.full(2, 0),
                          mode='lines',
                          line_color='white', showlegend=False))

  fig.add_trace(go.Scatter(x=active_norm["year"], y=active_norm["count"],
                  mode='lines', fill='tonexty', line_shape="spline", name="#Benchmark active"))
      
  #fig.update_traces(orientation='h', side='positive', width=3, points=False)
  # yaxis=dict(showticklabels=False)
  fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False, showlegend=True, 
      title=f"{tlc}", width=1200)
  fig.update_xaxes(dtick=1.0)
  fig.show()
  fig.write_html(f"artefacts/{tlc.replace(' ', '_').lower()}_prop_sota_active.html", include_plotlyjs="cdn")
  fig.write_image(f"artefacts/{tlc.replace(' ', '_').lower()}_prop_sota_active.png", scale=2)