# Módulo 4. Estructuras de datos en Python

In [None]:
### LISTAS

# Ejemplos:
# Crear una lista con distintos tipos de datos (CON CORCHETES)
my_list = [1, 2.1, "hello", [3, 4]]

# Acceso por índice
print(my_list[0])   # 1
print(my_list[1])   # 2.1

# Slicing
print(my_list[1:3])   # [2.1, "hello"]

# Extender una lista
tasks = ["Take a nap"]
new_tasks = ["Go for a walk", "Read a book"]
tasks.extend(new_tasks)   # ["Take a nap", "Go for a walk", "Read a book"]
print(tasks)



1
2.1
[2.1, 'hello']
['Take a nap', 'Go for a walk', 'Read a book']


In [None]:
# Aplicación práctica: lista de tareas
tasks = ["Buy groceries", "Finish report", "Call Mom"]
tasks.append("Programar cita con el dentista")
tasks.remove("Finish report")
tasks.sort() # reordena la lista
print(tasks)  

['Buy groceries', 'Call Mom', 'Programar cita con el dentista']


In [None]:
# TUPLAS # no se pueden modificar como las listas

# Crear una tupla
my_tuple = (10, 20, "python") # (CON PARÉNTESIS)

# Acceso por índice
print(my_tuple[2])   # "python"

# Slicing
print(my_tuple[0:2])   # (10, 20)

# Intentar modificar dará error
# my_tuple[1] = 30  # ❌ Error
# my_tuple.append("nuevo elemento")  # ❌ Error

# Aplicación práctica: coordenadas geográficas
coordinates = (37.7749, -122.4194)  # Latitud y longitud de San Francisco # La inmutabilidad de la tupla garantiza que estas coordenadas permanezcan fijas y precisas,

python
(10, 20)


In [None]:
# CONJUNTOS

# Ejemplo básico:
# Creación de un conjunto con duplicados
my_set = {1, 2, 3, 3} # CON LLAVES
print(my_set)   # {1, 2, 3}  -> el duplicado se elimina

# Operaciones con conjuntos
a = {1, 2, 3}
b = {3, 4, 5}

a.add(6)               # Añadir elemento
a.remove(2)            # Eliminar elemento
print(a.union(b))      # {1, 3, 4, 5, 6}
print(a.intersection(b))  # {3}
print(a.difference(b))    # {1, 6}

#Aplicación real: Seguimiento de likes en redes sociales
liked_by = {12345, 67890, 98765}  # IDs únicos de usuarios
print(len(liked_by))  # 3 usuarios únicos

{1, 2, 3}
{1, 3, 4, 5, 6}
{3}
{1, 6}
3


In [None]:
# DICCIONARIOS

# Ejemplo básico:
# Creación de un diccionario
my_dict = {"name": "Alice", "age": 30, "city": "New York"} # ES COMO PONER VALORES A LAS CLAVES CON :

# Acceso por clave
print(my_dict["age"])   # 30

# Métodos útiles
print(my_dict.get("city"))   # "New York"
print(my_dict.keys())        # dict_keys(['name', 'age', 'city'])
print(my_dict.values())      # dict_values(['Alice', 30, 'New York'])
print(my_dict.items())       # dict_items([('name','Alice'),('age',30),('city','New York')])

# Modificación
my_dict.update({"age": 31}) # AQUÍ SE PONE CON LLAVES
print(my_dict["age"])  # 31

# Aplicación real: Catálogo de productos
products = {
    "P101": {"name": "Laptop", "price": 999.99},
    "P102": {"name": "Smartphone", "price": 599.99}
}

# Acceso al precio de un producto
print(products["P101"]["price"])  # 999.99


30
New York
dict_keys(['name', 'age', 'city'])
dict_values(['Alice', 30, 'New York'])
dict_items([('name', 'Alice'), ('age', 30), ('city', 'New York')])
31
999.99


In [None]:
# Listas (mutables)

shopping_list = ["apples", "bananas", "milk"]

# Añadir un ítem
shopping_list.append("eggs")

# Eliminar un ítem
shopping_list.remove("apples")

print(shopping_list)  
# ['bananas', 'milk', 'eggs']

['bananas', 'milk', 'eggs']


In [14]:
# Diccionarios (mutables)

item_quantities = {"apples": 3, "bananas": 1}

# Añadir un ítem con su cantidad
item_quantities["eggs"] = 12

# Actualizar cantidad
item_quantities["bananas"] += 2

# Eliminar un ítem
del item_quantities["apples"]

print(item_quantities)  
# {'bananas': 3, 'eggs': 12}

{'bananas': 3, 'eggs': 12}


In [15]:
# Ejemplo práctico completo: App de compras (listas y diccionarios mutables)

shopping_list = ["apples", "bananas", "milk"]  # Lista de ítems
item_quantities = {"apples": 3, "bananas": 1}  # Diccionario con cantidades

# Usuario añade un ítem
shopping_list.append("eggs")
item_quantities["eggs"] = 12

# Usuario aumenta la cantidad de bananas
item_quantities["bananas"] += 2

# Usuario elimina apples
shopping_list.remove("apples")
del item_quantities["apples"]

# Resultados
print(shopping_list)      # ['bananas', 'milk', 'eggs']
print(item_quantities)    # {'bananas': 3, 'eggs': 12}

['bananas', 'milk', 'eggs']
{'bananas': 3, 'eggs': 12}


In [16]:
# Tuplas (inmutables)

# Tuplas: datos fijos
coordinates = (37.7749, -122.4194)  # Latitud y longitud de San Francisco
birth_date = (1990, 12, 25)        # Año, mes, día

# Acceso a elementos
print(coordinates[0])  # 37.7749
print(birth_date[1])   # 12

37.7749
12


In [17]:
# Conjuntos (mutables por defecto). frozenset es la excepción, es conjunto inmutable

# Conjunto básico
unique_colors = {"red", "green", "blue"}

# Eliminar duplicados de una lista
numbers = [1, 2, 2, 3, 4, 4, 5]
unique_numbers = set(numbers)  

print(unique_numbers)  # {1, 2, 3, 4, 5}

# Operaciones de conjuntos
A = {1, 2, 3}
B = {3, 4, 5}
print(A.union(B))        # {1, 2, 3, 4, 5}
print(A.intersection(B)) # {3}
print(A.difference(B))   # {1, 2}

{1, 2, 3, 4, 5}
{1, 2, 3, 4, 5}
{3}
{1, 2}


In [18]:
# MUTABLE VS INMUTABLE

# MUTABLE

# Lista de tareas (mutable)
tasks = ["Buy groceries", "Finish report"]
tasks.append("Call mom") 
print(tasks)  # ['Buy groceries', 'Finish report', 'Call mom']

# Diccionario con datos actualizables
stock_prices = {"AAPL": 175, "GOOG": 2800}
stock_prices["AAPL"] = 180  # actualizar valor
print(stock_prices)  # {'AAPL': 180, 'GOOG': 2800}

# INMUTABLE

# frozenset inmutable
fs = frozenset([1, 2, 3])
print(fs)  # frozenset({1, 2, 3})

# Se puede usar como clave en un diccionario
data = {fs: "grupo de números"}
print(data[fs])  # grupo de números

['Buy groceries', 'Finish report', 'Call mom']
{'AAPL': 180, 'GOOG': 2800}
frozenset({1, 2, 3})
grupo de números


In [None]:
# Reto de programación: Carrito de la compra

shopping_cart = []

# Añadir elementos a la lista

shopping_cart.append("apple") # solo deja uno si quieres ponerlos todos en una misma linea se usa extend
shopping_cart.append("banana")
shopping_cart.append("milk")

# Imprimimos Shopping Cart y recorremos con un for los item de la lista

print("Shopping Cart:")

for item in shopping_cart:
    print(item)

Shopping Cart:
apple
banana
milk


In [36]:
# MÁS EJEMPLOS DE DICCIONARIOS 

# Escenarios reales de uso
# Libro de calificaciones

calificaciones = {"Ana": 95, "Luis": 88, "María": 76}
# Permite consultar notas, calcular promedios o identificar mejores resultados.


# Perfiles en redes sociales

perfil = {
    "usuario": "juan23",
    "nombre": "Juan Pérez",
    "edad": 25,
    "intereses": ["música", "programación"],
    "amigos": ["ana99", "luis88"]
}
# Representa información completa del usuario (datos, amigos, mensajes, configuración).


# Catálogo de productos en una tienda online

productos = {
    "P101": {"nombre": "Libro Python", "precio": 29.99, "stock": 15},
    "P102": {"nombre": "Auriculares", "precio": 59.99, "stock": 8}
}
# Facilita búsquedas, gestión de inventario y filtrado de artículos.


# Traducción de idiomas

traducciones = {"hello": "hola", "goodbye": "adiós"}
# Base para apps de traducción.


# Archivos de configuración

configuracion = {
    "volumen": 80,
    "resolucion": "1920x1080",
    "dificultad": "media"
}
# Guarda ajustes de usuario en juegos o aplicaciones.


# Contar frecuencia de palabras en un texto

texto = "hola hola python"
frecuencia = {"hola": 2, "python": 1}
# Muy útil en procesamiento de lenguaje natural.

In [49]:
# Reto de programación: Crear un diccionario Python

product_catalog = {
    "SKU123": {"name": "Widget A", "price": 19.99, "quantity": 50},
    "SKU456": {"name": "Gadget B", "price": 34.95, "quantity": 25},
    "SKU789": {"name": "Gizmo C", "price": 9.99, "quantity": 100},
}

# Vamos a imprimir el precio de cada producto

sku_to_find = "SKU123"

if sku_to_find == "SKU123":
    print(f"The price of {product_catalog[sku_to_find]['name']} is ${product_catalog[sku_to_find]['price']}")

The price of Widget A is $19.99


In [53]:
## MÁS CONJUNTOS

# Operaciones comunes con sets
# Unión (union): Combina todos los elementos únicos de dos conjuntos.

set1 = {1, 2, 3}
set2 = {3, 4, 5}
combined = set1.union(set2)  # {1, 2, 3, 4, 5}
print(combined)

# Intersección (intersection): Devuelve los elementos comunes a ambos conjuntos.

common = set1.intersection(set2)  # {3}
print(common)

# Eliminar duplicados de una lista:

numbers = [1, 2, 2, 3, 3, 4]
unique_numbers = set(numbers)  # {1, 2, 3, 4}
print(unique_numbers)

{1, 2, 3, 4, 5}
{3}
{1, 2, 3, 4}


In [None]:
# Operaciones con listas

my_list = ["tarea1", "tarea2", "tarea3"]

my_list.append("tarea4")      # Añade al final
my_list.insert(1, "tarea1.5") # Inserta en posición específica
my_list.pop()                 # Elimina y devuelve el último elemento. El método pop() puede eliminar y devolver el último elemento de la lista, o puede especificar el índice del invitado a eliminar.
my_list.remove("tarea2")      # Elimina la primera aparición de un elemento
del my_list[0]                # Elimina un elemento por índice
print(my_list)

my_list.clear()               # Elimina todos los elementos de la lista
                              
                              # Faltaría my_list.sort() Para ordenar la lista (si es en números lo ordena de orden ascendente por defecto)

print(my_list)

['tarea1.5', 'tarea3']
[]


In [56]:
# Diccionarios

# Ejemplo práctico: almacenar contactos:

# Crear un diccionario
contacts = {"Alice": "555-1234", "Bob": "555-5678", "Carol": "555-9012"}

# Recuperar el número de Bob
bobs_phone = contacts["Bob"]
print(bobs_phone)  # Output: 555-5678

# Añadir un nuevo contacto
contacts["David"] = "555-4321"

# Actualizar el número de Carol
contacts["Carol"] = "555-2468"

# Eliminar el contacto de Alice
del contacts["Alice"]

# Mostrar contactos actualizados
print(contacts)
# Output: {'Bob': '555-5678', 'Carol': '555-2468', 'David': '555-4321'}

555-5678
{'Bob': '555-5678', 'Carol': '555-2468', 'David': '555-4321'}


In [None]:
# Más conjuntos

# Operaciones comunes y ejemplo de código:
# Crear un conjunto de lenguajes de programación favoritos
languages = {"Python", "JavaScript", "Java"}

# Añadir un nuevo lenguaje
languages.add("C++")

# Intentar añadir "Python" nuevamente (no se añade porque ya existe)
languages.add("Python")
print(languages)  
# Output: {'Python', 'C++', 'JavaScript', 'Java'} (el orden puede variar)

# Eliminar un elemento
languages.remove("Java")

# Crear otro conjunto
web_languages = {"JavaScript", "HTML", "CSS"}

# Encontrar elementos comunes entre los conjuntos
common_languages = languages.intersection(web_languages)
print(common_languages)  # Output: {'JavaScript'}

{'C++', 'JavaScript', 'Python', 'Java'}
{'JavaScript'}


In [None]:
# Desafío de código: Hacer una lista

# Values provided (do not change) 
array = [1, 2, 2, 3, 1, 4, 5, 3]

# The following line will need to change to only store unique values (quitar duplicados)
unique_set = set(array)

# List conversion and print provided (do not change) (ordenarlo con una lista)
unique_array = list(unique_set)
print(unique_array)

[1, 2, 3, 4, 5]


## Ejercicios de estructura de los datos: La práctica hace al maestro. TRADUCCIÓN EN APUNTES ##

## How to Interact with this Jupyter Notebook

In this activity, you will use a Jupyter Notebook, which integrates both text and code. The gray boxes contain executable code, which you will run in order to view its output. The text in between the code provides instructions.


## Scenario: The Furry Foodie

Welcome to The Furry Foodie, where tails wag and purrs rumble! 

You're the new data analyst on the block, and it's your job to ensure our inventory management system runs smoothly. You'll be working with various data structures to track product IDs, monitor stock levels, analyze sales trends, and organize our wide selection of pet products. 

### Project Summary:

* **Lists - Inventory Management:** Efficiently manage and update our inventory list using Python lists. We'll practice adding new items, removing out-of-stock products, and ensuring our inventory is always up-to-date.

* **Tuples - Immutable Data:** Explore the concept of immutability with tuples and understand their suitability for storing fixed data, such as product IDs.

* **Dictionaries - Inventory Lookup:** Utilize dictionaries to create a dynamic inventory database. We'll learn how to load inventory data from a CSV file, access and update stock levels, and handle item additions and removals.

* **Iterating over a Dictionary:** Generate clear and informative inventory reports by iterating through the dictionary and extracting key-value pairs.


For this activity, you will have access to a CSV file named `inventory.csv` that has already been created for you and contains some initial items. However, it has not yet been loaded into the jupyter notebook.

In the cell below, add `inventory.csv` within pd.read_csv() to load the inventory data from the CSV file. Then, run the cell, which will:
* Extract the product names from the DataFrame and store them in a list named `inventory`.
* Print the inventory list to view the data.

In [1]:
# Import a powerful tool called "pandas" that you'll use to work with and organize data easily
import pandas as pd

# Load the inventory from the CSV file
inventory_df = pd.read_csv('inventory.csv')

# Extract the product names from the DataFrame and store them in a list
inventory = inventory_df['product_name'].tolist()

# Print the inventory list and inspect the output
print(inventory) 

# para verlo todo como un dataframe usar: inventory_df

['Salmon Snacks', 'Cheesy Chompers', 'Peanut Butter Biscuits', 'Feathery Fun Sticks', 'Squeaky Sausages', 'Tuna Tidbits', 'Crunchy Carrots', 'Beef Jerky', 'Chicken Chunks', 'Duck Delights', 'Lamb Lovin', 'Turkey Tidbits', 'Fish Fingers', 'Dental Chews', 'Hip & Joint Treats', 'Skin & Coat Formula', 'Calming Chews', 'Training Treats', 'Catnip Mice', 'Feather Wand', 'Laser Pointer', 'Scratching Post', 'Cat Tree', 'Litter Box', 'Litter Scoop', 'Cat Food (dry)', 'Cat Food (wet)', 'Dog Food (dry)', 'Dog Food (wet)', 'Dog Leash', 'Dog Collar', 'Dog Harness', 'Dog Bed', 'Dog Toys', 'Cat Toys', 'Pet Shampoo', 'Pet Conditioner', 'Pet Toothbrush', 'Pet Toothpaste', 'Pet Nail Clippers', 'Pet Carrier', 'Pet Bowls', 'Pet Water Fountain', 'Pet First Aid Kit', 'Pet Grooming Brush', 'Pet Waste Bags', 'Pet Training Clicker', 'Pet Travel Bowl', 'Pet Cooling Mat', 'Pet Heating Pad']


Notice that the output above is a list of inventory items at the Furry Foodie. 

It turns out that "Cheesy Chompers" is no longer a popular choice at The Furry Foodie and you've decided to discontinue them. You need to update the inventory list to reflect this change.

The `remove()` method searches for the specified item in the list and removes its first occurrence. If the item is not found, it raises a ValueError.

In the cell below:

* Use the `remove()` method to remove the first occurrence of "Cheesy Chompers" from the `inventory` list.

Then, run the cell to print the updated inventory and see the changes.


In [2]:
# Remove the "Cheesy Chompers"
inventory.remove("Cheesy Chompers") 

print("\nUpdated inventory:")
print(inventory)


Updated inventory:
['Salmon Snacks', 'Peanut Butter Biscuits', 'Feathery Fun Sticks', 'Squeaky Sausages', 'Tuna Tidbits', 'Crunchy Carrots', 'Beef Jerky', 'Chicken Chunks', 'Duck Delights', 'Lamb Lovin', 'Turkey Tidbits', 'Fish Fingers', 'Dental Chews', 'Hip & Joint Treats', 'Skin & Coat Formula', 'Calming Chews', 'Training Treats', 'Catnip Mice', 'Feather Wand', 'Laser Pointer', 'Scratching Post', 'Cat Tree', 'Litter Box', 'Litter Scoop', 'Cat Food (dry)', 'Cat Food (wet)', 'Dog Food (dry)', 'Dog Food (wet)', 'Dog Leash', 'Dog Collar', 'Dog Harness', 'Dog Bed', 'Dog Toys', 'Cat Toys', 'Pet Shampoo', 'Pet Conditioner', 'Pet Toothbrush', 'Pet Toothpaste', 'Pet Nail Clippers', 'Pet Carrier', 'Pet Bowls', 'Pet Water Fountain', 'Pet First Aid Kit', 'Pet Grooming Brush', 'Pet Waste Bags', 'Pet Training Clicker', 'Pet Travel Bowl', 'Pet Cooling Mat', 'Pet Heating Pad']


Great work! The updated inventory list no longer shows "Cheesy Chompers", which has been succesfully removed.

However, a new shipment of delicious pet treats has just arrived. 

The `new_items_str` variable in the cell below contains a comma-separated string representing these new items. The provided code converts the `new_items_str` into a list of individual items using the `split()` method. 

The `split()` method breaks a string into a list of substrings based on a specified delimiter (in this case, a comma followed by a space).

In the cell below: 
* Use the `extend()` method to add all the new items from the `new_items` list to the end of the existing inventory list by adding `new_items` to the `extend()` method. 

Lastly, run the cell to print the updated inventory and see your newly added items. 

In [3]:
print("A new shipment of gourmet goodies has arrived!")

# New shipment of goodies! (provided as a string)
new_items_str = "Squeaky Sausages, Tuna Tidbits, Crunchy Carrots"

# Convert the string to a list and stores them in the variable new_items
new_items = new_items_str.split(", ") 

# Update the inventory list by adding "new_items" to the extend() method
inventory.extend(new_items) 

print("\nUpdated inventory:")
print(inventory) 

A new shipment of gourmet goodies has arrived!

Updated inventory:
['Salmon Snacks', 'Peanut Butter Biscuits', 'Feathery Fun Sticks', 'Squeaky Sausages', 'Tuna Tidbits', 'Crunchy Carrots', 'Beef Jerky', 'Chicken Chunks', 'Duck Delights', 'Lamb Lovin', 'Turkey Tidbits', 'Fish Fingers', 'Dental Chews', 'Hip & Joint Treats', 'Skin & Coat Formula', 'Calming Chews', 'Training Treats', 'Catnip Mice', 'Feather Wand', 'Laser Pointer', 'Scratching Post', 'Cat Tree', 'Litter Box', 'Litter Scoop', 'Cat Food (dry)', 'Cat Food (wet)', 'Dog Food (dry)', 'Dog Food (wet)', 'Dog Leash', 'Dog Collar', 'Dog Harness', 'Dog Bed', 'Dog Toys', 'Cat Toys', 'Pet Shampoo', 'Pet Conditioner', 'Pet Toothbrush', 'Pet Toothpaste', 'Pet Nail Clippers', 'Pet Carrier', 'Pet Bowls', 'Pet Water Fountain', 'Pet First Aid Kit', 'Pet Grooming Brush', 'Pet Waste Bags', 'Pet Training Clicker', 'Pet Travel Bowl', 'Pet Cooling Mat', 'Pet Heating Pad', 'Squeaky Sausages', 'Tuna Tidbits', 'Crunchy Carrots']


Great job adding those new items!

As the Furry Foodie grows, you're starting to see a lot of products with similar names. It's getting tricky to keep them organized, especially when processing orders or tracking sales. You need a way to assign unique identifiers to each product, something that won't accidentally get changed or mixed up.

You will use tuples to represent unique product IDs. Tuples are ordered, immutable collections of items.  Immutability means that once a tuple is created, its elements cannot be changed. This makes tuples ideal for storing data that shouldn't be modified.

Please note that you will run the cell below twice. 

First, run the cell now, which has a few simple product IDs using tuples that has already been created for you. When you run the cell, you will print the product IDs. Observe the output.

Then, remove the `#` in front of the last line of code to uncomment it. This last line of code attempts to modify a tuple. 

Run the cell a second time and observe the `TypeError` that demonstrates the immutability of tuples.

In [4]:
# Create some product IDs as tuples
product_id1 = ("Salmon Snacks", "Small")
product_id2 = ("Cheesy Chompers", "Medium")
product_id3 = ("Peanut Butter Biscuits", "Large")

# Print the product IDs
print("Product IDs:")
print(product_id1)
print(product_id2)
print(product_id3)

# Try to modify a tuple by removing the "#" in front of the line below (this will cause an error)
product_id1[0] = "Tuna Treats"  # Uncomment this line to see the TypeError

Product IDs:
('Salmon Snacks', 'Small')
('Cheesy Chompers', 'Medium')
('Peanut Butter Biscuits', 'Large')


TypeError: 'tuple' object does not support item assignment

The previous sections focused on using lists and tuples for inventory management. While these data structures are useful, they have limitations when it comes to efficiently accessing and updating specific product information. 

Dictionaries offer a more organized and flexible approach, allowing you to store product names as keys and their corresponding stock levels as values. This key-value structure enables quick lookups and modifications, making dictionaries ideal for managing inventory data.

Run the cell below, which uses the pandas library to load data from the CSV file and coverts it into a dictionary. Notice pandas' versatility here in handling different data structures.

In [5]:
import pandas as pd

# Load inventory data from CSV
inventory_df = pd.read_csv('inventory.csv')

# Set the 'product_name' column as the index
inventory_df.set_index('product_name', inplace=True)

# Convert the DataFrame directly to a dictionary, selecting only the 'stock_level' column
inventory_dict = inventory_df['stock_level'].to_dict()

# Print the inventory
print("Initial Inventory:")
print(inventory_dict)

Initial Inventory:
{'Salmon Snacks': 25, 'Cheesy Chompers': 15, 'Peanut Butter Biscuits': 30, 'Feathery Fun Sticks': 34, 'Squeaky Sausages': 16, 'Tuna Tidbits': 13, 'Crunchy Carrots': 17, 'Beef Jerky': 16, 'Chicken Chunks': 46, 'Duck Delights': 23, 'Lamb Lovin': 22, 'Turkey Tidbits': 25, 'Fish Fingers': 13, 'Dental Chews': 45, 'Hip & Joint Treats': 33, 'Skin & Coat Formula': 33, 'Calming Chews': 48, 'Training Treats': 11, 'Catnip Mice': 23, 'Feather Wand': 40, 'Laser Pointer': 21, 'Scratching Post': 14, 'Cat Tree': 45, 'Litter Box': 48, 'Litter Scoop': 18, 'Cat Food (dry)': 15, 'Cat Food (wet)': 24, 'Dog Food (dry)': 28, 'Dog Food (wet)': 15, 'Dog Leash': 23, 'Dog Collar': 23, 'Dog Harness': 40, 'Dog Bed': 30, 'Dog Toys': 41, 'Cat Toys': 40, 'Pet Shampoo': 35, 'Pet Conditioner': 27, 'Pet Toothbrush': 38, 'Pet Toothpaste': 24, 'Pet Nail Clippers': 30, 'Pet Carrier': 30, 'Pet Bowls': 27, 'Pet Water Fountain': 12, 'Pet First Aid Kit': 40, 'Pet Grooming Brush': 48, 'Pet Waste Bags': 45, 'P

Notice that the output above is now a dictionary where the product names are the keys, and the stock levels are the corresponding values. 

For example, "Salmon Snacks" is the key and "25" is the value. 

This structure allows for efficient lookup and management of inventory information. 

Imagine that a new shipment of exciting pet products has arrived at The Furry Foodie and you need to update your `inventory_dict` dictionary to reflect these changes.

In the cell below: 

* Add a new item, "Puppy Snacks", to the `inventory_dict` with a stock level of 40.
* We also received additional stock of "Cheesy Chompers". Update the stock level of "Cheesy Chompers" in the `inventory_dict` to 20.

Then, run the cell to print the updated `inventory_dict` and see the changes.

In [6]:
# Add "Puppy Snacks" to the inventory
inventory_dict["Puppy Snacks"] = 40

# Update "Cheesy Chompers" in the inventory and assign it the value of 20
inventory_dict["Cheesy Chompers"] = 20

print("\nUpdated Inventory:")
print(inventory_dict)


Updated Inventory:
{'Salmon Snacks': 25, 'Cheesy Chompers': 20, 'Peanut Butter Biscuits': 30, 'Feathery Fun Sticks': 34, 'Squeaky Sausages': 16, 'Tuna Tidbits': 13, 'Crunchy Carrots': 17, 'Beef Jerky': 16, 'Chicken Chunks': 46, 'Duck Delights': 23, 'Lamb Lovin': 22, 'Turkey Tidbits': 25, 'Fish Fingers': 13, 'Dental Chews': 45, 'Hip & Joint Treats': 33, 'Skin & Coat Formula': 33, 'Calming Chews': 48, 'Training Treats': 11, 'Catnip Mice': 23, 'Feather Wand': 40, 'Laser Pointer': 21, 'Scratching Post': 14, 'Cat Tree': 45, 'Litter Box': 48, 'Litter Scoop': 18, 'Cat Food (dry)': 15, 'Cat Food (wet)': 24, 'Dog Food (dry)': 28, 'Dog Food (wet)': 15, 'Dog Leash': 23, 'Dog Collar': 23, 'Dog Harness': 40, 'Dog Bed': 30, 'Dog Toys': 41, 'Cat Toys': 40, 'Pet Shampoo': 35, 'Pet Conditioner': 27, 'Pet Toothbrush': 38, 'Pet Toothpaste': 24, 'Pet Nail Clippers': 30, 'Pet Carrier': 30, 'Pet Bowls': 27, 'Pet Water Fountain': 12, 'Pet First Aid Kit': 40, 'Pet Grooming Brush': 48, 'Pet Waste Bags': 45, '

Notice the output above. Puppy Snacks with a stock level of 40 has been added to your dictionary! The stock level value of Cheesy Chompers has also been updated to 20! Great work.

Unfortunately, you hear that "Peanut Butter Biscuits" has been discontinued at The Furry Foodie.

You need to remove this item from your `inventory_dict` dictionary to keep the records up-to-date.

In the cell below: 

* Use the `del` keyword to remove the "Peanut Butter Biscuits" entry from the `inventory_dict` dictionary. Add `del` to the front of the `inventory_dict` variable and fill in "Peanut Butter Biscuits".

Then, run the cell to print the updated `inventory_dict` and see the changes.

In [7]:
# We've discontinued "Peanut Butter Biscuits". Remove it from the inventory

# Remove item 
del inventory_dict["Peanut Butter Biscuits"] 

print("\nUpdated Inventory after Discontinuation:")
print(inventory_dict)


Updated Inventory after Discontinuation:
{'Salmon Snacks': 25, 'Cheesy Chompers': 20, 'Feathery Fun Sticks': 34, 'Squeaky Sausages': 16, 'Tuna Tidbits': 13, 'Crunchy Carrots': 17, 'Beef Jerky': 16, 'Chicken Chunks': 46, 'Duck Delights': 23, 'Lamb Lovin': 22, 'Turkey Tidbits': 25, 'Fish Fingers': 13, 'Dental Chews': 45, 'Hip & Joint Treats': 33, 'Skin & Coat Formula': 33, 'Calming Chews': 48, 'Training Treats': 11, 'Catnip Mice': 23, 'Feather Wand': 40, 'Laser Pointer': 21, 'Scratching Post': 14, 'Cat Tree': 45, 'Litter Box': 48, 'Litter Scoop': 18, 'Cat Food (dry)': 15, 'Cat Food (wet)': 24, 'Dog Food (dry)': 28, 'Dog Food (wet)': 15, 'Dog Leash': 23, 'Dog Collar': 23, 'Dog Harness': 40, 'Dog Bed': 30, 'Dog Toys': 41, 'Cat Toys': 40, 'Pet Shampoo': 35, 'Pet Conditioner': 27, 'Pet Toothbrush': 38, 'Pet Toothpaste': 24, 'Pet Nail Clippers': 30, 'Pet Carrier': 30, 'Pet Bowls': 27, 'Pet Water Fountain': 12, 'Pet First Aid Kit': 40, 'Pet Grooming Brush': 48, 'Pet Waste Bags': 45, 'Pet Trai

Great work! Now, run the cell below, which will generate a nicely formatted inventory report.

Here's a breakdown of the final cell:

* Print a header: It will start by printing a clear header "Inventory Report:" to visually separate the report from other output.

* Iterate through the inventory: It uses a `for` loop to go through each item in the `inventory_dict` dictionary.  

* Extract product and stock information: For each iteration of the loop, the `items()` method of the dictionary is used to get both the `product` (key) and its corresponding `stock` level (value).

* Print formatted output: Using an f-string, it creates a nicely formatted line of text for each product, displaying its name and current stock level. This creates a clear and organized inventory report.

Run the code and see the report!


In [8]:
# It's time to generate a nicely formatted inventory report!

print("\nInventory Report:")
for product, stock in inventory_dict.items(): 
    print(f"Product: {product}, Stock: {stock}") 



Inventory Report:
Product: Salmon Snacks, Stock: 25
Product: Cheesy Chompers, Stock: 20
Product: Feathery Fun Sticks, Stock: 34
Product: Squeaky Sausages, Stock: 16
Product: Tuna Tidbits, Stock: 13
Product: Crunchy Carrots, Stock: 17
Product: Beef Jerky, Stock: 16
Product: Chicken Chunks, Stock: 46
Product: Duck Delights, Stock: 23
Product: Lamb Lovin, Stock: 22
Product: Turkey Tidbits, Stock: 25
Product: Fish Fingers, Stock: 13
Product: Dental Chews, Stock: 45
Product: Hip & Joint Treats, Stock: 33
Product: Skin & Coat Formula, Stock: 33
Product: Calming Chews, Stock: 48
Product: Training Treats, Stock: 11
Product: Catnip Mice, Stock: 23
Product: Feather Wand, Stock: 40
Product: Laser Pointer, Stock: 21
Product: Scratching Post, Stock: 14
Product: Cat Tree, Stock: 45
Product: Litter Box, Stock: 48
Product: Litter Scoop, Stock: 18
Product: Cat Food (dry), Stock: 15
Product: Cat Food (wet), Stock: 24
Product: Dog Food (dry), Stock: 28
Product: Dog Food (wet), Stock: 15
Product: Dog Lea

## Project Recap: Managing Inventory at The Furry Foodie with Python

Congratulations on completing the Furry Foodie inventory management project! You've successfully applied your Python skills to keep our shelves stocked and our furry customers happy. Let's recap what you've learned:

* **Lists:** You've mastered how to use lists to represent and manage inventory. You've added new items, removed out-of-stock products, and kept everything organized, ensuring we never run out of the treats our pets love.

* **Tuples:** You've learned how to leverage the immutability of tuples to store crucial product identifiers, guaranteeing data integrity and preventing accidental modifications.

* **Dictionaries:** You've harnessed the power of dictionaries to create a flexible and efficient inventory database. You've added new products, updated stock levels, checked for item availability, and even removed discontinued items from the inventory.

* **Iterating over Dictionaries:** You've used loops to traverse dictionaries, extracting key-value pairs to generate clear and informative inventory reports, keeping both management and customers well-informed.

* **Working with External Data:** You've learned how to load and process data from a CSV file using the pandas library, enabling you to interact with real-world datasets.


## ACTIVIDAD: Encontrar lo que necesitas: TRADUCCIÓN EN APUNTES

## Scenario: Taming the Book Stacks: Sorting and Searching in Python

Imagine you're looking for a specific book in a large library.  The endless rows of shelves can be overwhelming, and finding that one title can feel like searching for a needle in a haystack.

In this Jupyter Notebook, you'll learn how to use Python to make searching a digital library much easier and more efficient. You'll explore sorting algorithms, which organize book data in a logical way, just like arranging books on a shelf. Then, you'll dive into search algorithms - these are like having your own digital librarian to help you quickly find the exact book you're looking for.


### Why Sorting and Searching Algorithms Matter:

* **Efficient Navigation:** Sorting algorithms arrange book data in a logical order, making it easier for users (or librarians!) to browse and find what they need. Think of it like arranging books alphabetically on a shelf – it saves everyone time and frustration.

* **Rapid Retrieval :** Search algorithms enable quick and precise book lookup. Whether you're searching by title, author, or other criteria, these algorithms act like a librarian's expert knowledge, pinpointing the desired book efficiently.


* **Scalability :** As libraries grow (and digital collections can grow very large), sorting and searching algorithms become even more crucial. They ensure that the library system remains responsive and user-friendly, even with thousands or millions of books.

* **Flexibility :** These algorithms aren't just for libraries. They are fundamental tools for organizing and accessing data in countless applications, from online stores to search engines. Understanding them equips you with valuable skills for working with data in any field.


## Activity Summary:

In this activity, you will:

* **Sort Book Data:** Learn how to sort a list of book dictionaries, creating a well-organized digital catalog.
* **Implement Linear Search:** Discover how to systematically search through your book collection, examining each item one by one.
* **Implement Binary Search:** Explore a more efficient search method that leverages sorted data to quickly find specific books.
* **Compare and Contrast:** Understand the tradeoffs between different search algorithms, empowering you to choose the best approach for various scenarios.


Begin by running the cell below, which imports the pandas library and loads the csv file named 'book_catalog_10.csv'.

In [1]:
# Import a powerful tool called "pandas" that you'll use to work with and organize data easily
import pandas as pd

# Load the book catalog from the CSV file
book_catalog_df = pd.read_csv('book_catalog_10.csv')

Notice that nothing has been output from the cell after you ran it. That's because we didn't print any of the results of the csv, we only loaded it in the cell above as a **DataFrame**.

Think of a DataFrame like a super-organized table. It has rows and columns, just like a spreadsheet you might have used before. This structure makes it really easy to view, manage, and analyze our book data.  

Run the cell below, which converts the DataFrame `book_catalog_df` above into a **dictionary** and then prints the results. 

Imagine a dictionary as a collection of labeled boxes. Each box has a label (called a "key") and something stored inside (called a "value").  In our case, each "box" represents a book, and the labels are things like "title", "author", and "publication_year".

You're doing this conversion because it will make it easier to work with the book data.

In [2]:
# Convert the DataFrame to a list of dictionaries
book_catalog = book_catalog_df.to_dict(orient='records')

# Print the list of dictionaries
print(book_catalog)

[{'title': '1984', 'author': 'George Orwell', 'publication_year': 1949}, {'title': 'Pride and Prejudice', 'author': 'Jane Austen', 'publication_year': 1813}, {'title': 'The Catcher in the Rye', 'author': 'J.D. Salinger', 'publication_year': 1951}, {'title': 'The Great Gatsby', 'author': 'F. Scott Fitzgerald', 'publication_year': 1925}, {'title': 'To Kill a Mockingbird', 'author': 'Harper Lee', 'publication_year': 1960}, {'title': 'The Lord of the Rings', 'author': 'J.R.R. Tolkien', 'publication_year': 1954}, {'title': "Harry Potter and the Philosopher's Stone", 'author': 'J.K. Rowling', 'publication_year': 1997}, {'title': "The Hitchhiker's Guide to the Galaxy", 'author': 'Douglas Adams', 'publication_year': 1979}, {'title': 'The Da Vinci Code', 'author': 'Dan Brown', 'publication_year': 2003}, {'title': 'The Hunger Games', 'author': 'Suzanne Collins', 'publication_year': 2008}]


Great! Now that you've printed the `book_catalog`, you can see that it is organized as a list of dictionaries.  

Remember, each dictionary is like a labeled box holding information about one book. Inside each "box," you'll find pairs of labels (called "keys") and the information associated with them (called "values"). 

For example, the key "title" is paired with the value "1984", telling us the name of the book.  

Take a quick glance at the output above see if you can spot any familiar titles or authors within those curly brackets.

Although you have the `book catalog` printed out, it is a bit jumbled, isn't it?  Imagine trying to find a specific book in a messy library – it would take forever!

That's why you need to sort the `book catalog`.  Just like arranging books alphabetically on a shelf, sorting makes it much easier to browse and find what we're looking for. 

The code in the cell below does exactly that! Before you run it, let's break it down:

* **Helper Functions:**  In programming, a helper function is simply a function that assists another function in completing its task.  Think of these like little librarian assistants.  They perform specific tasks to make the main job (sorting the catalog) easier.
    * `get_title(book)`:  This helper simply plucks the title out of a book's dictionary, so we can focus on just that when sorting. 
    * `sort_catalog_by_title(catalog)`: This is the main librarian in charge of sorting.  It uses a special Python trick (`catalog.sort()`) to arrange all the books in alphabetical order by their titles. 

* **Putting It All Together:**
    * We call the `sort_catalog_by_title` function to do the actual sorting. 
    * Finally, we print out the nicely organized catalog so you can see the results! 

**Ready to see it in action? Run the cell below!** 

In [3]:
# Helper function 1: 
def get_title(book): 
  """Helper function to extract the title from a book dictionary."""
  return book['title']

# Helper function 2: 
def sort_catalog_by_title(catalog): 
  """Sorts the book catalog alphabetically by title."""
  catalog.sort(key=get_title)
    
# Sort the catalog
sort_catalog_by_title(book_catalog)

# Display the sorted catalog
for book in book_catalog:
  print(f"Title: {book['title']}, Author: {book['author']}, Publication Year: {book['publication_year']}")

Title: 1984, Author: George Orwell, Publication Year: 1949
Title: Harry Potter and the Philosopher's Stone, Author: J.K. Rowling, Publication Year: 1997
Title: Pride and Prejudice, Author: Jane Austen, Publication Year: 1813
Title: The Catcher in the Rye, Author: J.D. Salinger, Publication Year: 1951
Title: The Da Vinci Code, Author: Dan Brown, Publication Year: 2003
Title: The Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1925
Title: The Hitchhiker's Guide to the Galaxy, Author: Douglas Adams, Publication Year: 1979
Title: The Hunger Games, Author: Suzanne Collins, Publication Year: 2008
Title: The Lord of the Rings, Author: J.R.R. Tolkien, Publication Year: 1954
Title: To Kill a Mockingbird, Author: Harper Lee, Publication Year: 1960


Look at that! Your book catalog is now sorted alphabetically by title. 

It's much easier to find a specific book now, isn't it? This is the power of sorting algorithms – bringing order to data so we can navigate it more efficiently. 

But what if you want to find a book without scanning the entire list? That's where search algorithms come in handy.  They help you pinpoint specific items within a dataset, just like a librarian helps you find a book on the shelf. 

In the next cell, you'll explore one such algorithm called **linear search**.  It's like systematically checking each book on a shelf until we find the one we're looking for. But how does checking each book on a shelf one by one translate into Python code?

Let's breakdown the code in the following cell before running it: 

The `search_books` function below does the following:

1. **Takes Input:** It needs two things to work:
    * The `catalog`:  This is our sorted list of book dictionaries.
    * The `query`:  This is what we're searching for – it could be a book title or an author's name. 


2. **Sets up a List for Results:** It creates an empty list called `results` to store any books that match our search. 

3. **Checks Each Book:**  It goes through each `book` in the `catalog`:
    * It converts both the `query` and the book's `title` and `author` to lowercase. This makes the search case-insensitive (so "the great gatsby" and "The Great Gatsby" are considered the same). 
    * It checks if the `query` is found within the book's `title` OR the book's `author`.
    * If there's a match, it adds that `book` to the `results` list. 


4. **Returns the Results:**  Finally, it gives us back the `results` list, which contains all the books that matched our search (or an empty list if nothing was found).

**In simpler terms, it's like going through each book on the shelf, checking if it's the one we want, and putting it aside if it is.**

**Ready to see it in action? Run the cell below!** 

In [None]:
# Helper function: Linear Search
def search_books(catalog, query):
  """Searches for books by title or author using linear search."""
  results = []
  for book in catalog:
    if query.lower() in book['title'].lower() or query.lower() in book['author'].lower(): # lo que te busca por título o por autor
      results.append(book)
  return results

Now that we have our `search_books` function ready, let's put it to the test!

In the code cell below:

1. **Fill in the Query:**  Replace the empty quotes (`""`) in the `query` variable with the title **"The Great Gatsby"**.  This is the book we'll try to find in our catalog.

2. **Run the Code:**  Execute the cell to see if our linear search can locate the book. 

If everything works correctly, you should see the details of "The Great Gatsby" printed out! 

In [5]:
# Search for "The Great Gatsby" 
query = "The Great Gatsby" 
search_results = search_books(book_catalog, query)

if search_results:
  print("\nSearch results:")
  for book in search_results:
    print(f"Title: {book['title']}, Author: {book['author']}, Publication Year: {book['publication_year']}")
else:
  print("No books found matching your query.")


Search results:
Title: The Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1925


Great! Now try searching for "Dune" in the query. Replace the empty quotes (`""`) in the `query` variable with the title **"Dune"** and run the cell.

Notice the output.

In [6]:
# Search for "Dune" 
query = "Dune" 
search_results = search_books(book_catalog, query)

if search_results:
  print("\nSearch results:")
  for book in search_results:
    print(f"Title: {book['title']}, Author: {book['author']}, Publication Year: {book['publication_year']}")
else:
  print("No books found matching your query.")

No books found matching your query.


Your `search_books` function is working well, but it is also versatile!  It can search for books by *either* title *or* author. 

Try searching for an author instead of a book.

1. **Change the Query:** In the code cell below, replace the existing query with `"J.K. Rowling"`. 

2. **Run the Code:** Execute the cell and observe the results.

**Think about it:** Even though we searched for an author's name, not a book title, we still got a result! Why do you think that is? Take a moment to review the `search_books` function code if you need a hint.

In [7]:
# Search for author
query = "J.K. Rowling" 
search_results = search_books(book_catalog, query)

if search_results:
  print("\nSearch results:")
  for book in search_results:
    print(f"Title: {book['title']}, Author: {book['author']}, Publication Year: {book['publication_year']}")
else:
  print("No books found matching your query.")


Search results:
Title: Harry Potter and the Philosopher's Stone, Author: J.K. Rowling, Publication Year: 1997


You linear search algorithm is working well, but it can be a bit slow if you have a *huge* library.  Imagine searching for a book in a massive bookstore by checking every single title one by one – that would take ages! 

Luckily, there's a faster way when our catalog is sorted: **binary search**. It's like playing a guessing game where you keep halving the possibilities until you find what you're looking for. But how does this translate into Python code?

Let's breakdown the code in the following cell before running it: 

Here is what the `binary_search_books` function does:

1. **Takes Input:**  Just like linear search, it needs the `catalog` and the `query` (the book title we're searching for).

2. **Sets Up Boundaries:**  It starts by marking the beginning (`low`) and the end (`high`) of the catalog.

3. **The Guessing Game:**
   * It calculates the middle point (`mid`) between `low` and `high`.
   * It checks if the book at the `mid` point is the one we're looking for.
   * If not, it compares the book's title at the `mid` point to our `query`:
     * If the `mid` point title is "less than" our `query`, we know the book we want must be in the *higher* half of the catalog, so we adjust `low` to `mid + 1`.
     * If the `mid` point title is "greater than" our `query`, we know the book we want must be in the *lower* half, so we adjust `high` to `mid - 1`.
   * It keeps repeating this process, narrowing down the search area by half each time, until it finds the book or determines it's not there.


4. **Returns the Result:**  If it finds the book, it returns the entire book dictionary. Otherwise, it returns `None` to indicate the book wasn't found.

**Think of it like this:** You're looking for a word in a dictionary. Instead of starting from the beginning, you open it somewhere in the middle. If the word you want comes *after* the words on that page, you flip to the second half of the dictionary. If it comes *before*, you flip to the first half. You keep doing this until you find the exact page!

**Ready to see it in action? Run the cell below!** 

In [8]:
def binary_search_books(catalog, query):
  """Searches for books by title using binary search (assuming sorted catalog)."""
  low = 0
  high = len(catalog) - 1

  while low <= high:
    mid = (low + high) // 2
    if catalog[mid]['title'].lower() == query.lower():
      return catalog[mid]
    elif catalog[mid]['title'].lower() < query.lower():
      low = mid + 1
    else:
      high = mid - 1

  return None  # Book not found

In the cells above, you used linear search to find "The Great Gatsby", but can you find it even *faster*? 

Remember, binary search is super speedy when our data is sorted. It works by repeatedly dividing the search area in half, like a high-tech guessing game.

The problem is that your `book_catalog_10.csv` dataset is much too small for you to observe the difference between these two search algorithms. In the code cells below, you will find that a much larger `big_book_catalog.csv` dataset has been loaded for you. 

It has around 271,380 rows!

1. **Run the Following Two Cells:** The cells below each already have the query set to `"The Great Gatsby"`. Each cell also has uses the `time` module to measure the execution time of the `search_books` and `binary_search_books` functions by recording timestamps before and after the search and then calculating the difference. Note that the exact times will vary based on your system's power, but expect an extremely quick search in the binary search compared to the binary. Also note, Jupyter Notebooks may "cache" search results, so running the same query twice may end up with times that appear to be zero. You can have Jupyter "Restart the kernel" to get fresh results.

2. **Observe the Output of Each Cell:** Why was the binary search faster than linear search?

**Think about it:**  

* How much faster do you think binary search would be if our catalog had 1,000,000 books?

In [10]:
# This is like using a stopwatch to see how long it takes to find the books. 
# We start the timer, do the search, then stop the timer and calculate how much time passed.
import time

# Loads the big_book_catalog which has which has ~271,380 rows!
big_book_catalog_df = pd.read_csv('big_book_catalog.csv', low_memory=False)

# Convert 'title' and 'author' to strings, handle NaN
big_book_catalog_df['title'] = big_book_catalog_df['title'].fillna('').astype(str)
big_book_catalog_df['author'] = big_book_catalog_df['author'].fillna('').astype(str)

sorted_df = big_book_catalog_df.sort_values(by=['title'])
big_book_catalog = big_book_catalog_df.to_dict(orient='records')

# Search for "The Great Gatsby" using Linear Search
query = "The Great Gatsby"  # Example query

start_time = time.time()  # Record start time

search_results = search_books(big_book_catalog, query)

end_time = time.time()    # Record end time

elapsed_time_linear = end_time - start_time
print(f"\nLinear search took {elapsed_time_linear:.5f} seconds.") 

if search_results:
  print("\nSearch results:")
  for book in search_results:
    print(f"Title: {book['title']}, Author: {book['author']}, Publication Year: {book['publication_year']}")
else:
  print("No books found matching your query.")


Linear search took 0.16016 seconds.

Search results:
Title: The Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1999
Title: The GREAT GATSBY (Scribner Classic), Author: F. Scott Fitzgerald, Publication Year: 1979
Title: The GREAT GATSBY (Scribner Classic), Author: F. Scott Fritzgerald, Publication Year: 1920
Title: The Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1995
Title: The GREAT GATSBY (A Scribner Classic), Author: F. Scott Fitzgerald, Publication Year: 1992
Title: Notes on The Great Gatsby: Notes (York Notes), Author: Tang Soo Ping, Publication Year: 1980
Title: The Great Gatsby (World's Classics), Author: F. Scott Fitzgerald, Publication Year: 1998
Title: Der Grobe Gatsby/the Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1994
Title: The Great Gatsby (Everyman S.), Author: F.Scott Fitzgerald, Publication Year: 1993
Title: F. Scott Fitzgerald's the Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1976
Title: The Great Gatsb

In [None]:
# This is like using a stopwatch to see how long it takes to find the books. # MUCHO MEJOR LA BÚSQUEDA BINARIA EN TIEMPO DE EJECUCIÓN PORQUE TENEMOS UN CONJUNTO GRANDE DE DATOS
# We start the timer, do the search, then stop the timer and calculate how much time passed.
import time

# Loads the big_book_catalog which has which has ~271,380 rows!
big_book_catalog_df = pd.read_csv('big_book_catalog.csv', low_memory=False)

# Convert 'title' and 'author' to strings, handle NaN
big_book_catalog_df['title'] = big_book_catalog_df['title'].fillna('').astype(str)
big_book_catalog_df['author'] = big_book_catalog_df['author'].fillna('').astype(str)

sorted_df = big_book_catalog_df.sort_values(by=['title'])
big_book_catalog = sorted_df.to_dict(orient='records')

# Search for "The Great Gatsby" using Binary Search
query = "The Great Gatsby"  # Example query

start_time = time.time()

search_results = binary_search_books(big_book_catalog, query)

elapsed_time_binary = time.time() - start_time

print(f"\nBinary search took {elapsed_time_binary:.5f} seconds.")

if search_results:
  print("\nBinary Search Result for 'The Great Gatsby':")
  print(f"Title: {search_results['title']}, Author: {search_results['author']}, Publication Year: {search_results['publication_year']}")
else:
  print("Binary Search: Book not found")


Binary search took 0.00196 seconds.

Binary Search Result for 'The Great Gatsby':
Title: The Great Gatsby, Author: F. Scott Fitzgerald, Publication Year: 1995


### Observing the Search Process

While the output for both searches might look the same, the way they got there was quite different! 

* **Linear Search:**  Think back to how linear search works – it checks each book one by one until it reaches the end of the catalog. Since "Dune" isn't there, it had to go through the entire list before giving up. 

* **Binary Search:**  Binary search, on the other hand, is more strategic. It keeps dividing the catalog in half, eliminating large chunks with each step.  Even though our catalog is small, you might have noticed it didn't have to check every single book to know "Dune" wasn't there. 

**Key Takeaway:** Even with a small dataset, you can start to see the inherent difference in how these algorithms operate.  Imagine how much more pronounced this difference would be with a catalog containing thousands of books!

While the output for both searches might look the same, the way they got there was quite different! 

* **Linear Search:**  Think back to how linear search works – it checks each book one by one until it reaches the end of the catalog or finds the target book. If the target book isn't present, it has to go through the entire list before giving up. 

* **Binary Search:**  Binary search, on the other hand, is more strategic. It requires the catalog to be sorted. It keeps dividing the catalog in half, eliminating large chunks with each step.  Even with a large catalog, it only needs to check a small fraction of the books to determine if the target book is present or not. 

**Key Takeaway:** With the significantly larger dataset (`big_book_catalog.csv`), you should observe a much more pronounced difference in search times. The linear search, having to potentially check every single book, will likely take a noticeable amount of time. In contrast, the binary search, with its divide-and-conquer strategy, should be considerably faster. This efficiency gain becomes even more crucial as the size of the catalog grows.

**Think about it:**  

* If the catalog had 1,000,000 books, the linear search could potentially take 10 times longer than it did with the current dataset. However, the binary search's time would only increase slightly due to its logarithmic time complexity. It's safe to say that binary search would be dramatically faster in such a scenario, highlighting its superiority for large datasets.

In [None]:
# Ejemplo 1: Algoritmo de ordenación simple
# Este método encuentra la carta más baja y la aparta repetidamente. Es fácil de entender, pero lento para listas grandes.

def simple_sort(cards):
    sorted_cards = []
    while cards:
        lowest_card = min(cards)  
        sorted_cards.append(lowest_card)
        cards.remove(lowest_card)
    return sorted_cards

In [13]:
# Ejemplo 2: Quicksort (ordenación rápida)
# Mucho más eficiente para listas grandes. Utiliza un pivote y divide los elementos en menores y mayores, aplicando recursión.

def quicksort(cards):
    if len(cards) < 2:
        return cards  # Caso base: ya está ordenado si hay 0 o 1 elemento
    else:
        pivot = cards[0]  # Elegir el primer elemento como pivote
        less = [i for i in cards[1:] if i <= pivot]
        greater = [i for i in cards[1:] if i > pivot]
        return quicksort(less) + [pivot] + quicksort(greater)

In [14]:
# Comparación de búsqueda en listas y diccionarios
# Se muestra cómo la búsqueda en diccionarios es mucho más rápida que en listas, especialmente para grandes conjuntos de datos:

import timeit

# Búsqueda en lista
list_data = list(range(100000))
lookup_value = 99999
list_time = timeit.timeit(lambda: lookup_value in list_data, number=1000)

# Búsqueda en diccionario
dict_data = {i: i for i in range(100000)}
dict_time = timeit.timeit(lambda: lookup_value in dict_data, number=1000)

print("List lookup time:", list_time)
print("Dictionary lookup time:", dict_time)


List lookup time: 1.1799383000470698
Dictionary lookup time: 8.099991828203201e-05


In [None]:
# Estructuras de datos especializadas en Python

# Deque (cola de doble extremo)
# Permite añadir o eliminar elementos eficientemente desde ambos extremos. Ideal para colas, pilas, simulaciones de líneas o funcionalidad de deshacer/rehacer.


from collections import deque

queue = deque()
queue.append("task1")
queue.append("task2")
print(queue.popleft())  # Output: task1 #el primer valor es el que se va ((como las de un parque de atracciones que se añanden gente al final de la fila y se montan los primeros de la fila))

task1


In [4]:
# collections.Counter
# Cuenta la frecuencia de elementos en un iterable. Perfecto para analizar palabras, contar elementos de listas, o identificar los más comunes.


from collections import Counter

text = "This is a sample text with some repeated words words"
word_counts = Counter(text.split())
print(word_counts)
# Output: Counter({'words': 2, 'This': 1, 'is': 1, 'a': 1, 'sample': 1, 'text': 1, 'with': 1, 'some': 1, 'repeated': 1})

Counter({'words': 2, 'This': 1, 'is': 1, 'a': 1, 'sample': 1, 'text': 1, 'with': 1, 'some': 1, 'repeated': 1})
