<a href="https://colab.research.google.com/github/CamiloVga/Curso-Inteligencia-Artificial/blob/main/BotAmbientalV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# Instalación de dependencias necesarias
!pip install transformers huggingface_hub gradio accelerate

# Import necessary packages
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import hf_hub_download
import gradio as gr
import torch
import logging
import sys
from accelerate import infer_auto_device_map, init_empty_weights
import time

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Define the model name
model_name = "microsoft/phi-2"

try:
    logger.info("Starting model initialization...")

    # Check CUDA availability
    device = "cuda" if torch.cuda.is_available() else "cpu"
    logger.info(f"Using device: {device}")

    # Configure PyTorch settings
    if device == "cuda":
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True

    # Load tokenizer
    logger.info("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True
    )
    logger.info("Tokenizer loaded successfully")

    # Load model
    logger.info("Loading model...")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        device_map="auto",
        trust_remote_code=True
    )
    logger.info("Model loaded successfully")

    # Create pipeline
    logger.info("Creating generation pipeline...")
    model_gen = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.1,
        device_map="auto"
    )
    logger.info("Pipeline created successfully")

except Exception as e:
    logger.error(f"Error during initialization: {str(e)}")
    raise

# Configure system message
system_message = {
    "role": "system",
    "content": """You are AQuaBot, an AI assistant aware of environmental impact.
    You help users with any topic while raising awareness about water consumption
    in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
    equivalent to the daily consumption of a city of 10,000 people?"""
}

# Constants for water consumption calculation
WATER_PER_TOKEN = {
    "input_training": 0.0000309,
    "output_training": 0.0000309,
    "input_inference": 0.05,
    "output_inference": 0.05
}

# Initialize variables
messages = [system_message]
total_water_consumption = 0

def calculate_tokens(text):
    try:
        return len(tokenizer.encode(text))
    except Exception as e:
        logger.error(f"Error calculating tokens: {str(e)}")
        return len(text.split()) + len(text) // 4  # Fallback to approximation

def calculate_water_consumption(text, is_input=True):
    tokens = calculate_tokens(text)
    if is_input:
        return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
    return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])

@torch.inference_mode()
def generate_response(user_input, chat_history):
    try:
        logger.info("Generating response for user input...")
        global total_water_consumption, messages

        # Calculate water consumption for input
        input_water_consumption = calculate_water_consumption(user_input, True)
        total_water_consumption += input_water_consumption

        # Add user input to messages
        messages.append({"role": "user", "content": user_input})

        # Create prompt
        prompt = ""
        for m in messages:
            if m["role"] == "system":
                prompt += f"<START SYSTEM MESSAGE>\n{m['content']}\n<END SYSTEM MESSAGE>\n\n"
            elif m["role"] == "user":
                prompt += f"User: {m['content']}\n"
            else:
                prompt += f"Assistant: {m['content']}\n"
        prompt += "Assistant:"

        logger.info("Generating model response...")
        outputs = model_gen(
            prompt,
            max_new_tokens=256,
            return_full_text=False,
            pad_token_id=tokenizer.eos_token_id,
        )
        logger.info("Model response generated successfully")

        assistant_response = outputs[0]['generated_text'].strip()

        # Calculate water consumption for output
        output_water_consumption = calculate_water_consumption(assistant_response, False)
        total_water_consumption += output_water_consumption

        # Add assistant's response to messages
        messages.append({"role": "assistant", "content": assistant_response})

        # Update chat history
        chat_history.append((user_input, assistant_response))

        # Prepare water consumption message
        water_message = f"""
        <div style="position: fixed; top: 20px; right: 20px;
                    background-color: white; padding: 15px;
                    border: 2px solid #ff0000; border-radius: 10px;
                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                💧 {total_water_consumption:.4f} ml
            </div>
            <div style="color: #666; font-size: 14px;">
                Water Consumed
            </div>
        </div>
        """

        return chat_history, water_message

    except Exception as e:
        logger.error(f"Error in generate_response: {str(e)}")
        error_message = f"An error occurred: {str(e)}"
        chat_history.append((user_input, error_message))
        return chat_history, show_water

# Create Gradio interface
try:
    logger.info("Creating Gradio interface...")
    with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
        gr.HTML("""
            <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                <h1 style="color: #2d333a;">AQuaBot</h1>
                <p style="color: #4a5568;">
                    Welcome to AQuaBot - An AI assistant that helps raise awareness about water
                    consumption in language models.
                </p>
            </div>
        """)

        chatbot = gr.Chatbot()
        message = gr.Textbox(
            placeholder="Type your message here...",
            show_label=False
        )
        show_water = gr.HTML(f"""
            <div style="position: fixed; top: 20px; right: 20px;
                        background-color: white; padding: 15px;
                        border: 2px solid #ff0000; border-radius: 10px;
                        box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                    💧 0.0000 ml
                </div>
                <div style="color: #666; font-size: 14px;">
                    Water Consumed
                </div>
            </div>
        """)
        clear = gr.Button("Clear Chat")

        # Add footer with citation and disclaimer
        gr.HTML("""
            <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                        background-color: #f8f9fa; border-radius: 10px;">
                <div style="margin-bottom: 15px;">
                    <p style="color: #666; font-size: 14px; font-style: italic;">
                        Water consumption calculations are based on the study:<br>
                        Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
                        Footprint of AI Models. ArXiv Preprint,
                        <a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
                    </p>
                </div>
                <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                    <p style="color: #666; font-size: 14px;">
                        <strong>Important note:</strong> This application uses Microsoft's Phi-2 model
                        instead of GPT-3 for availability and cost reasons. However,
                        the water consumption calculations per token (input/output) are based on the
                        conclusions from the cited paper.
                    </p>
                </div>
            </div>
        """)

        def submit(user_input, chat_history):
            return generate_response(user_input, chat_history)

        # Configure event handlers
        message.submit(submit, [message, chatbot], [chatbot, show_water])
        clear.click(
            lambda: ([], f"""
                <div style="position: fixed; top: 20px; right: 20px;
                            background-color: white; padding: 15px;
                            border: 2px solid #ff0000; border-radius: 10px;
                            box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                    <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
                        💧 0.0000 ml
                    </div>
                    <div style="color: #666; font-size: 14px;">
                        Water Consumed
                    </div>
                </div>
            """),
            None,
            [chatbot, show_water]
        )

    logger.info("Gradio interface created successfully")

    # Launch the application
    logger.info("Launching application...")
    demo.launch()

except Exception as e:
    logger.error(f"Error in Gradio interface creation: {str(e)}")
    raise

Collecting gradio
  Downloading gradio-5.4.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.26.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f56824bdb1fd59144f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
