In [136]:
from huggingface_hub import HfApi, ModelFilter
import pandas as pd
import math
import numpy as np
from tqdm.notebook import trange, tqdm
import plotly.express as px
import plotly.io as pio


In [137]:
class HuggingCrawler():
    def __init__(self):
        self.task_dct = {'text':[
                        "text-classification",
                        'translation',
                        'conversational',
                        'fill-mask',
                        'sentence-similarity',
                        'question-answering',
                        'summarization',
                        'text-generation',
                        'zero-shot-classification',
                        'text2text-generation',
                        'table-question-answering'


                        ],
                        'vision': [
                            'image-classification',
                            'image-segmentation',
                            'depth-estimation',
                            'image-to-image',
                            'object-detection',
                            'video-classification',
                            'unconditional-image-generation'
                        ],
                        'audio': [
                            'automatic-speech-recognition',
                            'audio-classification',
                            'text-to-speech',
                            'audio-to-audio',
                            'voice-activity-detection'
                        ],
                        'multimodal':[
                            'feature-extraction',
                            'text-to-image',
                            'visual-question-answering',
                            'image-to-text',
                            'document-question-answering'
                        ],
                        'tabular':['tabular-classification', 'tabular-regression'],
                        'reinforcement-learning':['reinforcement-learning', 'robotics']
                                }
    def scrape(self):
        api = HfApi()
        self.emission_list = list()
        self.task_type_list = list()
        for task_type, task_list in tqdm(self.task_dct.items(), total=len(self.task_dct)):
            for task in tqdm(task_list, total = len(task_list), leave = True):
                filt = ModelFilter(task=task)
                model_list = api.list_models(filter=filt, emissions_thresholds=(None, math.inf), cardData=True)
                for model in model_list:
                    if type(model.cardData['co2_eq_emissions']) is dict:
                        self.emission_list.append(model.cardData['co2_eq_emissions']['emissions'])
                        self.task_type_list.append(task_type)
                    else:
                        self.emission_list.append(model.cardData['co2_eq_emissions'])
                        self.task_type_list.append(task_type)
        self.emissions = pd.DataFrame(dict(Emissions = self.emission_list, Task=self.task_type_list))
        return self.emissions
    
    def plot(self, path):
        stunde_4K = 610
        pkw_ab2015_1km = 200
        colors = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#E69F00", "#56B4E9", "#009E73", "#F0E442"]
        fig = px.strip(df, x='Emissions', y='Task', color='Task',log_x=True, template='plotly_white', color_discrete_sequence=colors)
        fig.update_layout(xaxis = dict(tickformat = ".1r"), xaxis_title='Emission (gCO2)', showlegend=False)
        fig.add_vline(x=stunde_4K,line_color="gray", line_width=1, opacity=.6, line_dash="dash")
        fig.add_vline(x=pkw_ab2015_1km,line_color="gray", line_width=1, opacity=.6, line_dash="dash")
        fig.add_annotation(x=2.5, y=2,
            text="1km PKW (200g CO2)",
            showarrow=False,
            textangle=90,
            align="left",
            font=dict(size=12, color="black"))
        fig.add_annotation(x=3, y=2,
            text="1h 4K-Streaming (610g CO2)",
            showarrow=False,
            textangle=90,
            align="left",
            font=dict(size=12, color="black"))
        pio.write_image(fig, f'{path}/HuggingEmissions.png',scale=10, width=1080, height=600)
        fig.show()

In [139]:
hc = HuggingCrawler()
df = hc.scrape()
hc.plot('/Users/saminenno/Desktop/Sustainability/Git/Visuals')

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]