# Análisis de Ventas - Carga inicial completada

Este notebook trabaja sobre una base de datos MySQL populada con información contenida en archivos CSV.

Los datos fueron cargados desde los archivos ubicados en el directorio `./data` y corresponden a distintas entidades del negocio: categorías, productos, clientes, empleados, países y ventas.

**Nota:** Este notebook **no vuelve a cargar** los datos. Solo se conecta a la base para analizarlos y resolver las consignas del proyecto.

---

## Archivos disponibles:

- `categories.csv`
- `products.csv`
- `customers.csv`
- `employees.csv`
- `countries.csv`
- `sales.csv`

## Estructura esperada en la base de datos(nombre de tablas esperados):

- `categories`
- `products`
- `customers`
- `employees`
- `countries`
- `sales`

---


In [None]:
import pandas as pd
from pathlib import Path
import utils.notebook_utils as notebook_utils

data_path = Path("./data")
csv_files = data_path.glob("*.csv")

print("Summary of CSV files found:\n")

for file in csv_files:
    colored = notebook_utils.print_colored(text='File:', color='blue')
    print(f" {file.name}\n")

    try:
        df = pd.read_csv(file, nrows=5)
        with open(file, encoding="utf-8") as f:
            total_rows = sum(1 for _ in f) - 1  # excluye th

        print(df.to_string(index=False))
        print(f"\n Rows: {total_rows} | Columns: {len(df.columns)}\n")
    except Exception as e:
        print(f" Error reading file {file.name}: {e}")
    
    separator = notebook_utils.print_colored_separator()


## Received CSV data vs registries loaded to database

In [None]:
import pandas as pd
from pathlib import Path
import utils.sql_utils as sql_utils
import utils.notebook_utils as notebook_utils

data_path = Path("./data")

csv_files = data_path.glob("*.csv")

print("Summary of csv files found and database created:\n")

for file in csv_files:
    try:
        df = pd.read_csv(file, nrows=5)  
        total_rows = sum(1 for _ in open(file)) - 1  
        total_columns = len(df.columns)

        table_name = file.stem
        colored = notebook_utils.print_colored(text='Table name:', color='blue')
        print(f" {table_name}\n")

        print(f"First 5 records from tabe {table_name}:\n")
        sql_utils.run_query("""SELECT * FROM sales LIMIT 5;""")

        colored_title = notebook_utils.print_colored(text='File:', color='blue')
        print(f" {file.name}\n")
        print(df.to_string(index=False))
        print(f"\n Rows: {total_rows} || Columns : {total_columns}\n")
        
    except Exception as e:
        print(f" Error while readig file {file.name}: {e}")


## Verifyng firts five registries looking for equality

In [None]:
import pandas as pd
from pathlib import Path
from utils.sql_utils import run_query
import utils.notebook_utils as notebook_utils

data_path = Path("./data")
csv_files = data_path.glob("*.csv")

print("Checking data loaded from csv files and what was loaded on db. \n")

for file in csv_files:
    try:
        table_name = file.stem

        df_csv_sample = pd.read_csv(file, nrows=5)
        total_csv_rows = sum(1 for _ in open(file)) - 1  # sin header
        csv_columns = list(df_csv_sample.columns)

        db_sample = run_query(f"SELECT * FROM {table_name} LIMIT 5;")
        result = run_query(f"SELECT COUNT(*) as total FROM {table_name};")
        
        total_db_rows = result.iloc[0]['total']

        colored_filename = notebook_utils.print_colored(text='Filename -> table_name', color='blue', tag='p')
        print(f" {file.name} -> {table_name}  \n")

        print(f" CSV: {total_csv_rows} rows | DB: {total_db_rows} rows \n")
        
        # Comparación de columnas
        db_columns = list(db_sample.columns)
        if csv_columns != db_columns:
            notebook_utils.print_colored(text='Columnas no coinciden: ', color='red', tag='p')
            print(f"CSV: {csv_columns}")
            print(f"DB : {db_columns}")
        else:
            notebook_utils.print_colored(text='Columnas coinciden: ', color='green', tag='p')

            notebook_utils.print_colored(text='first 5 rows (CSV) ', color='blue', tag='p', weight='normal')
            print(df_csv_sample.to_string(index=False))

            notebook_utils.print_colored(text='first 5 rows (DB) ', color='blue', weight='normal')

            print(sql_utils.run_query(f"SELECT * FROM {table_name} LIMIT 5;"))

    except Exception as e:
        print(f" Error al procesar {file.name}: {e}")
    
    # print("─" * 60)
    notebook_utils.print_colored_separator()
