## **INSTALL REQUIRED PACKAGES**

In [1]:
# Install essential libraries for working with pretrained models and various machine learning tasks
!pip install -q peft accelerate bitsandbytes safetensors sentencepiece streamlit chromadb langchain sentence-transformers gradio pypdf langchain-community

# Upgrade the 'transformers' library to the latest version for access to new features and improvements
!pip install -U transformers

# Install the 'bitsandbytes' library from the official PyPI repository
!pip install -i https://pypi.org/simple/ bitsandbytes

# Install the 'accelerate' library for easy multi-GPU and multi-node training setups
!pip install accelerate

# Install 'weasyprint' for generating PDF documents from HTML and CSS
!pip install weasyprint


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.4/581.4 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m987.7/987.7 kB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.7/295.7 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

## IMPORT DEPENDENCIES

In [2]:
# fixing unicode error in google colab
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [3]:
# Import the PyTorch library for tensor computations and model operations
import torch

# Import necessary modules from the transformers library for working with pre-trained language models
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

# Import the os module for interacting with the operating system (e.g., for file paths)
import os

# Import the Gradio library for creating user interfaces for machine learning models
import gradio as gr

# Import the Google Colab drive module to mount Google Drive and access files from there
from google.colab import drive

# Import the chromadb library for working with vector databases and document indexing
import chromadb

# Import the HuggingFacePipeline class from langchain for integrating Hugging Face models into the LangChain framework
from langchain.llms import HuggingFacePipeline

# Import the TextLoader class from langchain for loading text documents
from langchain.document_loaders import TextLoader

# Import the RecursiveCharacterTextSplitter class from langchain for splitting text into chunks recursively
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Import the HuggingFaceEmbeddings class from langchain for converting text into embeddings using Hugging Face models
from langchain.embeddings import HuggingFaceEmbeddings

# Import the Chroma class from langchain for storing and retrieving vector embeddings
from langchain.vectorstores import Chroma

# Import the HuggingFacePipeline class again from langchain, which may be used for different purposes in the script
from langchain import HuggingFacePipeline

# Import the PyPDFDirectoryLoader class from langchain for loading text from PDF files in a directory
from langchain.document_loaders import PyPDFDirectoryLoader

# Import the ConversationalRetrievalChain class from langchain for creating a conversational AI model with retrieval capabilities
from langchain.chains import ConversationalRetrievalChain

# Import the ConversationBufferMemory class from langchain for managing conversation state and context
from langchain.memory import ConversationBufferMemory


## CHOOSE THE LLM MODEL (FROM **HUGGINGFACE**)

In [4]:
#model_name = "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA"
#model_name="microsoft/Phi-3-medium-4k-instruct"
model_name="HuggingFaceH4/zephyr-7b-beta"

### IMPORT THE QUANTIZED MODEL IN ORDER TO REDUCE THE RESOURCES CONSUMED

In [5]:
# function for loading 4-bit quantized model
def load_quantized_model(model_name: str):
    """
    :param model_name: Name or path of the model to be loaded.
    :return: Loaded quantized model.
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, # Enable loading the model with 4-bit quantization
        bnb_4bit_use_double_quant=True, # Use double quantization to improve performance
        bnb_4bit_quant_type="nf4", # Set the quantization type to "nf4" for efficient representation
        bnb_4bit_compute_dtype=torch.bfloat16
    )
# Load the model with the specified name and apply the quantization configuration
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,    # Set the data type for the model parameters to bfloat16
        quantization_config=bnb_config # Apply the quantization configuration to the model
    )
    return model # Return the loaded quantized model

In [6]:
# function for initializing tokenizer
def initialize_tokenizer(model_name: str):
    """
    Initialize the tokenizer with the specified model_name.

    :param model_name: Name or path of the model for tokenizer initialization.
    :return: Initialized tokenizer.
    """

    tokenizer = AutoTokenizer.from_pretrained(model_name)  # Load the pre-trained tokenizer using the provided model name or path.
    tokenizer.bos_token_id = 1  # Set beginning of sentence token id
    return tokenizer # Return the configured tokenizer for use in further text processing or model interaction tasks.

### CREATE THE COLAB SECRETE NAMED *'HF_TOKEN'* WITH YOUR HUGGING FACE API KEY

In [7]:
from google.colab import userdata
userdata.get('HF_TOKEN') #This token is often used for authentication with HUGGING

'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'

In [8]:
### to load the quantized model we need at least the T4 GPU!

# load model
model = load_quantized_model(model_name)

# initialize tokenizer
tokenizer = initialize_tokenizer(model_name)

# specify stop token ids
stop_token_ids = [0]

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

## UPLOAD YOUR FILES IN GOOGLE DRIVE INSIDE THE FOLDER *'documents_colab'*

In [9]:
import os
from urllib.request import urlretrieve
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [10]:
# mount google drive and specify folder path
drive.mount('/content/drive')
folder_path = '/content/drive/MyDrive/documents_colab/'

Mounted at /content/drive


### SINCE THE PDFs CAN BE STRUCTURED IN SUB-FOLDERS, WE NEED TO MOVE ALL OF THEM IN A SINGLE FOLDER (*'/PDFs'*)

In [11]:
import os
import shutil

source_dir = folder_path
destination_dir = '/content/Geografia'

# if destination_dir does not exist, then create it
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# move the PDFs
def move_pdfs(source, destination):
    for root, dirs, files in os.walk(source):
        for file in files:
            if file.lower().endswith('.pdf'):
                source_file = os.path.join(root, file)
                destination_file = os.path.join(destination, file)
                shutil.copy(source_file, destination_file)
                print(f'Spostato: {source_file} -> {destination_file}')

# run the function
move_pdfs(source_dir, destination_dir)
folder_path = destination_dir

Spostato: /content/drive/MyDrive/documents_colab/Geografia/Zanichelli_Dinucci_Essenziale_F5_Italia.pdf -> /content/Geografia/Zanichelli_Dinucci_Essenziale_F5_Italia.pdf
Spostato: /content/drive/MyDrive/documents_colab/Geografia/italia-territorio e popolazione_20120422.pdf -> /content/Geografia/italia-territorio e popolazione_20120422.pdf
Spostato: /content/drive/MyDrive/documents_colab/Geografia/2-1-continenti-ed-oceani-converted.pdf -> /content/Geografia/2-1-continenti-ed-oceani-converted.pdf
Spostato: /content/drive/MyDrive/documents_colab/Geografia/Geografia_generale_dei_continenti_Oceani.pdf -> /content/Geografia/Geografia_generale_dei_continenti_Oceani.pdf
Spostato: /content/drive/MyDrive/documents_colab/Geografia/europa_geo_parte1.pdf -> /content/Geografia/europa_geo_parte1.pdf
Spostato: /content/drive/MyDrive/documents_colab/Geografia/europa_geo_parte2.pdf -> /content/Geografia/europa_geo_parte2.pdf
Spostato: /content/drive/MyDrive/documents_colab/Geografia/leonetti-geo1.pdf -> 

## CONVERT WEB PAGES TO PDF FILES (**FILTERING THE TEXT**)

In [None]:
import os
import requests
from bs4 import BeautifulSoup
from weasyprint import HTML

#List of web pages
pagine_web = [
    "https://it.wikipedia.org/wiki/Italia",
    "https://www.esploriamo.com/regione-campania/campania-storia",
    "https://www.mazzoneturismo.it/campania-una-regione-ricca-di-bellezze-naturali-storia-e-cultura/",
    "https://it.wikipedia.org/wiki/Lago",
    "https://it.wikipedia.org/wiki/Fiume",
    "https://it.wikipedia.org/wiki/Terra",
    "https://it.wikipedia.org/wiki/Napoli",
    "https://it.wikipedia.org/wiki/Organizzazione_delle_Nazioni_Unite",
    "https://it.wikipedia.org/wiki/Campania",
    "https://it.wikipedia.org/wiki/Vesuvio",
    "https://it.wikipedia.org/wiki/Mar_Mediterraneo",
    "https://it.wikipedia.org/wiki/Diritti_umani"
]

# Destination folder path
cartella_destinazione = "/content/Geografia"

# Function to remove images, external links and references, and to clean text
def pulisci_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    # Removes images
    for img in soup.find_all('img'):
        img.decompose()

    # Removes external links keeping only internal links to Wikipedia
    for a in soup.find_all('a', href=True):
        if a['href'].startswith('http') and 'wikipedia.org' not in a['href']:
            a.unwrap()  # Remove only the <a> tag while keeping the internal text
        else:
            a.attrs = {key: value for key, value in a.attrs.items() if key != 'href'}  # Removes only the href attribute



    # # Remove references like "[20]"
    for ref in soup.find_all('sup', class_='reference'):
        ref.decompose()

    # Removes references like "[20]" in the text
    for text in soup.find_all(text=True):
        if text.strip().startswith('[') and text.strip().endswith(']'):
            text.replace_with('')

    return str(soup)

# Function to convert a web page to PDF
def converti_pagina_web_in_pdf(url, cartella_destinazione):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        html_content = response.text
        html_pulito = pulisci_html(html_content)
        nome_file = url.split("/")[-1] + ".pdf"
        percorso_file = os.path.join(cartella_destinazione, nome_file)
        HTML(string=html_pulito).write_pdf(percorso_file)
        print(f"PDF creato: {percorso_file}")
    except requests.RequestException as e:
        print(f"Errore nella richiesta per {url}: {e}")

# Creating the destination folder if it does not exist
os.makedirs(cartella_destinazione, exist_ok=True)

# Converting web pages to PDF
for pagina_web in pagine_web:
    converti_pagina_web_in_pdf(pagina_web, cartella_destinazione)

## LOAD, SPLIT -> RETRIEVER

In [12]:
# load pdf files
loader = PyPDFDirectoryLoader(folder_path)
documents = loader.load()

### CHOOSE THE '*chunk_size*' AND '*chunk_overlap*'

In [13]:
# The `chunk_size` parameter specifies the maximum number of characters each chunk should contain.
# The `chunk_overlap` parameter specifies how many characters should overlap between consecutive chunks.

# Initialize the RecursiveCharacterTextSplitter to divide the documents into smaller chunks.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200) #Chage the chunk_size and chunk_overlap as needed


# Split the documents into smaller or larger chunks based on the defined `chunk_size` and `chunk_overlap` parameters.
# The `split_documents` method processes the documents and creates a list of text chunks.
all_splits = text_splitter.split_documents(documents)

# Check the first few splits to ensure coherence
for i, split in enumerate(all_splits[:3]):
    print(f"Chunk {i+1}:\n{split}\n{'-'*20}")  # Display the first three chunks to check their content and coherence.



Chunk 1:
page_content='Modulo 2 . Il Globo, l’Europa, l ’Italia                                                                                               Cap. 1  - Continenti e  oceani  
V. Sciacca, Sognare sugli atlanti  1 
I continenti e gli oceani  
La superficie della Terra è di circa 510 milioni di Kmq.  Per la maggior parte essa è occupata da acque 
(idrosfera): mari e oceani ricoprono infatti circa il 70% del nostro pianeta; il restante 30% è occupato 
da terre emerse (continenti, isole e arcipelaghi)  prevalentemente  situate nell’emisfero boreale .  
I continenti sono sei: America , Europa , Asia , Africa , Oceania , Antartide ; l’Artide  (polo nord) non è 
un con tinente perché è costituita da porzioni di altri continenti e dalla banchisa  (massa di ghiaccio 
galleggiante) polare.  
Gli oceani  sono cinque : Oceano Pacifico , Oceano Atlantico , Oceano Indiano , Oceano A rtico  
(indicato anche come Mar Glaciale A rtico) e Oceano A ntartico . In realtà si tratta di un’unic

## CHOOSE YOUR EMBEDDING MODEL ([what embeddings are?](https://huggingface.co/blog/getting-started-with-embeddings))

In [14]:
# specify embedding model (using huggingface sentence transformer)
embedding_model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
# Define additional keyword arguments for the model. Here, we specify that we want to use a CUDA-enabled GPU for computations.
# 'cuda' indicates that the model will utilize the GPU for faster processing if available.
model_kwargs = {"device": "cuda"}

### CREATE THE RETRIEVER THAT WILL BE USED BY THE LLM ([ChromaDB](https://www.trychroma.com/))

In [15]:
# Initialize the HuggingFaceEmbeddings class with the specified model name and additional keyword arguments.
# This will load the pre-trained model for generating embeddings.
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)

# Create a Chroma vector database from the document chunks.
# This step involves embedding the document chunks using the previously initialized embedding model
# and storing these embeddings in a Chroma database for efficient retrieval.
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

# Set up the retriever from the Chroma vector database.
# The retriever will be used to query the vector database for relevant document chunks based on a given query.
retriever = vectordb.as_retriever()

  warn_deprecated(


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.12k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## DEFINE THE PIPELINE FOR THE LLM MODEL AND THE CONVERSATION MODEL ([pipelines definition hugging face](https://huggingface.co/docs/transformers/main_classes/pipelines))

In [20]:
# Import necessary classes and functions from the transformers library and langchain
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain.llms import HuggingFacePipeline

# Define a function to create a text generation pipeline
def create_pipeline(model, tokenizer):
    # Create a text generation pipeline using the provided model and tokenizer
    return pipeline(
        model=model,  # Specify the model to use for text generation
        tokenizer=tokenizer,  # Specify the tokenizer for the model
        return_full_text=False,  # Set to False to ensure langchain receives only the generated text, not the full text of the input
        task='text-generation',  # Specify the task as text generation
        max_new_tokens=400,  # Set the maximum number of tokens to generate in the output
        temperature=0.4,  # Set the temperature for sampling: a lower temperature makes the output more focused and deterministic
        do_sample=True,  # Enable sampling to introduce randomness in text generation, allowing for more varied and creative responses
        top_p=0.9,  # Use nucleus sampling to consider only the most probable words that make up 90% of the cumulative probability mass
        repetition_penalty=1.1
    )

# Create the pipeline by calling the create_pipeline function with the specified model and tokenizer
pipeline = create_pipeline(model, tokenizer)

# Wrap the pipeline in a HuggingFacePipeline object from langchain for integration with LangChain's LLM interface
llm = HuggingFacePipeline(pipeline=pipeline)


In [21]:
# Build a conversational retrieval chain with memory using LangChain
def create_conversation(query: str, chat_history: list) -> tuple:
    try:
        query_input = str("Rispondi in Italiano alla domanda:"+query)
        # Initialize the memory buffer to store the chat history.
        # This memory will hold the conversation context but will not return past messages.
        memory = ConversationBufferMemory(
            memory_key='chat_history',  # Key to access the stored chat history
            return_messages=False      # Whether to return the individual messages or not
        )

        # Create a ConversationalRetrievalChain instance with the given language model (llm),
        # a retriever for fetching relevant information, and the memory object to maintain chat history.
        # The 'get_chat_history' function here simply returns the chat history as it is.
        qa_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,  # The language model used for generating responses
            retriever=retriever,  # The retriever responsible for fetching relevant information
            memory=memory,  # The memory object that stores the chat history
            get_chat_history=lambda h: h,  # Lambda function to retrieve chat history, here it just returns the history unchanged
        )

        # Pass the query and chat history to the chain to generate a response.
        # Append the query and the generated answer to the chat history for context in future interactions.
        result = qa_chain({'question': query_input, 'chat_history': chat_history})
        chat_history.append((query, result['answer']))  # Update chat history with the current query and answer
        return '', chat_history  # Return an empty string and the updated chat history

    except Exception as e:
        # If an exception occurs, append the exception to the chat history as the response.
        # Return an empty string and the updated chat history.
        chat_history.append((query, str(e)))  # Append the exception message to the chat history
        return '', chat_history  # Return an empty string and the updated chat history


**USE GRADIO FOR THE UI**

In [None]:
import gradio as gr
js = """
function createGradioAnimation() {
    const container = document.createElement('div');
    container.id = 'gradio-animation';
    container.style.cssText = 'font-size: 2.5em; font-weight: bold; text-align: center; margin-bottom: 20px; color: #FFD700;';


    const text = 'Chatbot SGPT';
    text.split('').forEach((char, i) => {
        setTimeout(() => {
            const letter = document.createElement('span');
            letter.style.cssText = 'opacity: 0; transition: opacity 0.5s;';
            letter.innerText = char;
            container.appendChild(letter);
            setTimeout(() => {
                letter.style.opacity = '1';
            }, 50);
        }, i * 250);
    });

    const gradioContainer = document.querySelector('.gradio-container');
    gradioContainer.insertBefore(container, gradioContainer.firstChild);

    return 'Animation created';
}
"""

# Aggiungi stile CSS per colorare i bottoni e altre proprietà di stile
custom_css = """
<style>
    .ask-button, .clear-button {
        color: white;
        border: none;
        padding: 12px 24px;
        text-align: center;
        text-decoration: none;
        display: inline-block;
        font-size: 16px;
        margin: 4px 2px;
        border-radius: 8px;
        transition: all 0.3s ease;
        cursor: pointer;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    }

    .ask-button {
        background-color: #4CAF50; /* Verde */
    }

    .ask-button:hover {
        background-color: #45a049;
        transform: translateY(-2px);
        box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15);
    }

    .clear-button {
        background-color: #f44336; /* Rosso */
    }

    .clear-button:hover {
        background-color: #e53935;
        transform: translateY(-2px);
        box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15);
    }
</style>

"""

with gr.Blocks(fill_height=True, js=js,css=custom_css,theme=gr.themes.Soft()) as demo:
    chatbot = gr.Chatbot(scale=2, label='Textmining Project - SGPT_Team - 2023/2024')

    with gr.Row():
        with gr.Column(scale=9):
            msg = gr.Textbox(show_label=False, placeholder="Enter question and click on Ask button", lines=2)

        with gr.Column(scale=1):
             submit_btn = gr.Button("Ask", size="sm", elem_classes="ask-button")
             clear_btn = gr.Button("Clear", size="sm", elem_classes="clear-button")

    clear_btn.click(fn=lambda: ("", []), inputs=[], outputs=[msg, chatbot])
    submit_btn.click(create_conversation, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(create_conversation, [msg, chatbot], [msg, chatbot])

demo.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://60693122a3108dee1d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
