<a href="https://colab.research.google.com/github/CamiloVga/Codes/blob/main/BotAmbientalV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# Instalación de dependencias necesarias
!pip install transformers huggingface_hub gradio
!pip install -U bitsandbytes
!pip install sentencepiece

# Import necessary packages
from transformers import pipeline, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import hf_hub_download
import gradio as gr
import time

# Define the model name
model_name = "anslin-raj/Llama-2-7b-chat-hf-8-bit"

# Set up quantization configuration
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

# Load the model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# Create the text-generation pipeline
model_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
)



Collecting gradio
  Downloading gradio-5.3.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.3-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.26.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[

config.json:   0%|          | 0.00/981 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/7.01G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

In [4]:
# Configure system message
system_message = {
    "role": "system",
    "content": """You are AQuaBot, an AI assistant aware of environmental impact.
    You help users with any topic while raising awareness about water consumption
    in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
    equivalent to the daily consumption of a city of 10,000 people?"""
}

# Constants for water consumption calculation
WATER_PER_TOKEN = {
    "input_training": 0.0000309,    # ml per input token
    "output_training": 0.0000309,   # ml per output token
    "input_inference": 0.05,        # ml per input token in inference
    "output_inference": 0.05        # ml per output token in inference
}

# Initialize variables
messages = [system_message]
total_water_consumption = 0  # in ml

def calculate_tokens(text):
    """Calculate approximate number of tokens in a text"""
    return len(text.split()) + len(text) // 4  # Simple approximation

def calculate_water_consumption(text, is_input=True):
    """Calculate water consumption based on tokens"""
    tokens = calculate_tokens(text)
    if is_input:
        return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
    return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])

def generate_response(user_input, chat_history):
    global total_water_consumption, messages

    # Calculate water consumption for input
    input_water_consumption = calculate_water_consumption(user_input, True)
    total_water_consumption += input_water_consumption

    # Add user input to messages
    messages.append({"role": "user", "content": user_input})

    # Create prompt
    prompt = ""
    for m in messages:
        if m["role"] == "system":
            prompt += f"<START SYSTEM MESSAGE>\n{m['content']}\n<END SYSTEM MESSAGE>\n\n"
        elif m["role"] == "user":
            prompt += f"User: {m['content']}\n"
        else:
            prompt += f"Assistant: {m['content']}\n"
    prompt += "Assistant:"

    # Generate response
    outputs = model_gen(
        prompt,
        max_new_tokens=256,
        return_full_text=False
    )

    # Extract assistant's response
    assistant_response = outputs[0]['generated_text'].strip()

    # Calculate water consumption for output
    output_water_consumption = calculate_water_consumption(assistant_response, False)
    total_water_consumption += output_water_consumption

    # Add assistant's response to messages
    messages.append({"role": "assistant", "content": assistant_response})

    # Update chat history
    chat_history.append((user_input, assistant_response))

    # Prepare water consumption message with HTML styling
    water_message = f"""
    <div style="position: fixed; top: 20px; right: 20px;
                background-color: white; padding: 15px;
                border: 2px solid #ff0000; border-radius: 10px;
                box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
        <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
            💧 {total_water_consumption:.4f} ml
        </div>
        <div style="color: #666; font-size: 14px;">
            Water Consumed
        </div>
    </div>
    """

    return chat_history, water_message

# Create Gradio interface
with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
    gr.HTML("""
        <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
            <h1 style="color: #2d333a;">AQuaBot</h1>
            <p style="color: #4a5568;">
                Welcome to AQuaBot - An AI assistant that helps raise awareness about water
                consumption in language models. Did you know that training GPT-3 consumed
                5.4 million liters of water, equivalent to the daily consumption of a small city?
                Each conversation with models like ChatGPT can consume around 500ml of water
                for every 20-50 questions.
            </p>
        </div>
    """)

    # Create chat components
    chatbot = gr.Chatbot()
    message = gr.Textbox(
        placeholder="Type your message here... (Water counter will start working as you type)",
        show_label=False
    )
    show_water = gr.HTML(f"""
        <div style="position: fixed; top: 20px; right: 20px;
                    background-color: white; padding: 15px;
                    border: 2px solid #ff0000; border-radius: 10px;
                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                💧 0.0000 ml
            </div>
            <div style="color: #666; font-size: 14px;">
                Water Consumed
            </div>
        </div>
    """)
    clear = gr.Button("Clear Chat")

    # Add footer with citation and disclaimer
    gr.HTML("""
        <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                    background-color: #f8f9fa; border-radius: 10px;">
            <div style="margin-bottom: 15px;">
                <p style="color: #666; font-size: 14px; font-style: italic;">
                    Water consumption calculations are based on the study:<br>
                    Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
                    Footprint of AI Models. ArXiv Preprint,
                    <a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
                </p>
            </div>
            <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                <p style="color: #666; font-size: 14px;">
                    <strong>Important note:</strong> This application uses a compressed version of
                    Llama 2-7b instead of GPT-3 for availability and cost reasons. However,
                    the water consumption calculations per token (input/output) are based on the
                    conclusions from the cited paper.
                </p>
            </div>
        </div>
    """)

    # Define submission action
    def submit(user_input, chat_history):
        return generate_response(user_input, chat_history)

    # Configure event handlers
    message.submit(submit, [message, chatbot], [chatbot, show_water])
    clear.click(
        lambda: ([], f"""
            <div style="position: fixed; top: 20px; right: 20px;
                        background-color: white; padding: 15px;
                        border: 2px solid #ff0000; border-radius: 10px;
                        box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                    💧 0.0000 ml
                </div>
                <div style="color: #666; font-size: 14px;">
                    Water Consumed
                </div>
            </div>
        """),
        None,
        [chatbot, show_water]
    )

# Launch the application
demo.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c019f43cebc4eabe5d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [2]:
# Configurar el mensaje del sistema
mensaje_sistema = {
    "role": "system",
    "content": """Eres AQuaBot, un asistente de IA consciente del impacto ambiental.
    Ayudas a los usuarios con cualquier tema mientras creates conciencia sobre el consumo
    de agua en la IA. Sabías que el entrenamiento de GPT-3 consumió 5.4 millones de litros
    de agua, equivalente al consumo diario de una ciudad de 10,000 personas?"""
}

# Constantes para el cálculo del consumo de agua
AGUA_POR_TOKEN = {
    "input_entrenamiento": 0.0000309,  # ml por token de entrada
    "output_entrenamiento": 0.0000309,  # ml por token de salida
    "input_inferencia": 0.05,          # ml por token de entrada en inferencia
    "output_inferencia": 0.05          # ml por token de salida en inferencia
}

# Inicializar variables
mensajes = [mensaje_sistema]
consumo_total_agua = 0  # en ml

def calcular_tokens(texto):
    """Calcula el número aproximado de tokens en un texto"""
    return len(texto.split()) + len(texto) // 4  # Aproximación simple

def calcular_consumo_agua(texto, es_entrada=True):
    """Calcula el consumo de agua basado en los tokens"""
    tokens = calcular_tokens(texto)
    if es_entrada:
        return tokens * (AGUA_POR_TOKEN["input_entrenamiento"] + AGUA_POR_TOKEN["input_inferencia"])
    return tokens * (AGUA_POR_TOKEN["output_entrenamiento"] + AGUA_POR_TOKEN["output_inferencia"])

def generar_respuesta(entrada_usuario, historial_chat):
    global consumo_total_agua, mensajes

    # Calcular consumo de agua del input
    consumo_agua_entrada = calcular_consumo_agua(entrada_usuario, True)
    consumo_total_agua += consumo_agua_entrada

    # Agregar la entrada del usuario a los mensajes
    mensajes.append({"role": "user", "content": entrada_usuario})

    # Crear el prompt
    prompt = ""
    for m in mensajes:
        if m["role"] == "system":
            prompt += f"<INICIO MENSAJE SISTEMA>\n{m['content']}\n<FIN MENSAJE SISTEMA>\n\n"
        elif m["role"] == "user":
            prompt += f"Usuario: {m['content']}\n"
        else:
            prompt += f"Asistente: {m['content']}\n"
    prompt += "Asistente:"

    # Generar la respuesta
    salidas = model_gen(
        prompt,
        max_new_tokens=256,
        return_full_text=False
    )

    # Extraer la respuesta del asistente
    respuesta_asistente = salidas[0]['generated_text'].strip()

    # Calcular consumo de agua del output
    consumo_agua_salida = calcular_consumo_agua(respuesta_asistente, False)
    consumo_total_agua += consumo_agua_salida

    # Agregar la respuesta del asistente a los mensajes
    mensajes.append({"role": "assistant", "content": respuesta_asistente})

    # Actualizar historial del chat
    historial_chat.append((entrada_usuario, respuesta_asistente))

    # Preparar mensaje de consumo de agua con estilo HTML
    mensaje_agua = f"""
    <div style="position: fixed; top: 20px; right: 20px;
                background-color: white; padding: 15px;
                border: 2px solid #ff0000; border-radius: 10px;
                box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
        <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
            💧 {consumo_total_agua:.4f} ml
        </div>
        <div style="color: #666; font-size: 14px;">
            Agua Consumida
        </div>
    </div>
    """

    return historial_chat, mensaje_agua

# Crear la interfaz de Gradio
with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
    gr.HTML("""
        <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
            <h1 style="color: #2d333a;">AQuaBot</h1>
            <p style="color: #4a5568;">
                Bienvenido a AQuaBot - Un asistente de IA que ayuda a crear conciencia sobre el consumo
                de agua en los modelos de lenguaje. ¿Sabías que el entrenamiento de GPT-3 consumió
                5.4 millones de litros de agua, equivalente al consumo diario de una ciudad pequeña?
                Cada conversación con modelos como ChatGPT puede consumir alrededor de 500ml de agua
                por cada 20-50 preguntas.
            </p>
        </div>
    """)

    # Crear componentes del chat
    chatbot = gr.Chatbot()
    mensaje = gr.Textbox(
        placeholder="Escribe tu mensaje aquí... (El contador de agua comenzará a funcionar mientras escribes)",
        show_label=False
    )
    mostrar_agua = gr.HTML(f"""
        <div style="position: fixed; top: 20px; right: 20px;
                    background-color: white; padding: 15px;
                    border: 2px solid #ff0000; border-radius: 10px;
                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                💧 0.0000 ml
            </div>
            <div style="color: #666; font-size: 14px;">
                Agua Consumida
            </div>
        </div>
    """)
    limpiar = gr.Button("Limpiar Chat")

    # Definir la acción de envío
    def enviar(entrada_usuario, historial_chat):
        return generar_respuesta(entrada_usuario, historial_chat)

    # Configurar los manejadores de eventos
    mensaje.submit(enviar, [mensaje, chatbot], [chatbot, mostrar_agua])
    limpiar.click(
        lambda: ([], f"""
            <div style="position: fixed; top: 20px; right: 20px;
                        background-color: white; padding: 15px;
                        border: 2px solid #ff0000; border-radius: 10px;
                        box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                    💧 0.0000 ml
                </div>
                <div style="color: #666; font-size: 14px;">
                    Agua Consumida
                </div>
            </div>
        """),
        None,
        [chatbot, mostrar_agua]
    )

# Lanzar la aplicación
demo.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e30f5d599e08640c54.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


