In [1]:
import pandas as pd

df = pd.read_csv("ventas.csv", encoding="latin1")

print("\nColumnas REALES del CSV:")
print(df.columns.tolist())

display(df.head())



Columnas REALES del CSV:
['Fecha', 'Producto', 'Tipo_Producto', 'Cantidad', 'Precio_Unitario', 'Ciudad', 'Pais', 'Tipo_Venta', 'Tipo_Cliente', 'Descuento', 'Costo_Envio']


Unnamed: 0,Fecha,Producto,Tipo_Producto,Cantidad,Precio_Unitario,Ciudad,Pais,Tipo_Venta,Tipo_Cliente,Descuento,Costo_Envio
0,Santiago,2025-10-30,Arepa,Abarrotes,2.0,3681.0,Online,Minorista,0.2,0.0,5889.0
1,CÃ³rdoba,2025-11-17,Arepa,Abarrotes,7.0,2321.0,Distribuidor,Gobierno,0.15,0.0,13809.0
2,Barranquilla,2025-10-22,Leche,LÃ¡cteo,9.0,3540.0,Distribuidor,Gobierno,0.2,0.0,25488.0
3,New York,2025-10-20,Cereal,LÃ¡cteo,3.0,3287.0,Tienda_FÃ­sica,Gobierno,0.05,0.0,9367.0
4,Madrid,2025-10-20,Leche,Hogar,2.0,3414.0,Distribuidor,Mayorista,0.0,0.0,6828.0



# HU1 — Data Connection and Upload to PostgreSQL


# HU1 — Data Loading and PostgreSQL Integration

This notebook demonstrates the process of loading sales data and preparing it for analysis.  
Steps include:

1. **Import Libraries:** Load necessary Python packages.
2. **Load Base CSV:** Read the main sales CSV file.
3. **Create Derived Tables:** Generate product and customer tables.
4. **Connect to PostgreSQL:** Establish connection using SQLAlchemy.
5. **Upload Tables:** Save the main and derived tables to the database.
6. **Backup CSVs:** Save CSV backups in a local folder.
7. **Verification:** Check row counts in PostgreSQL to confirm successful upload.

Author: Daniela MQ
Date: 28/11/2024  
Database: RiwiVentas


In [2]:


# ------------------------------------------------------------
# 1. Import Libraries
# ------------------------------------------------------------
import pandas as pd
from sqlalchemy import create_engine
from IPython.display import display
import os

print("Libraries imported successfully.")

# ------------------------------------------------------------
# 2. Load Base CSV (Sales Data)
# ------------------------------------------------------------
columns = [
    'city', 'date', 'product', 'product_type', 'quantity',
    'unit_price', 'sale_type', 'customer_type', 'discount',
    'shipping_cost', 'total_sales'
]

try:
    df_sales = pd.read_csv("ventas.csv", names=columns, header=0)
    print("\nCSV file loaded successfully.")
    display(df_sales.head())
except FileNotFoundError:
    print("Error: CSV file not found.")
    raise

# ------------------------------------------------------------
# 3. Create Derived Tables (Products and Customers)
# ------------------------------------------------------------
df_products = df_sales[['product', 'product_type', 'unit_price']].drop_duplicates()
df_customers = df_sales[['customer_type']].drop_duplicates()

print("\nDerived tables created:")
display(df_products.head())
display(df_customers.head())

# ------------------------------------------------------------
# 4. Connect to PostgreSQL
# ------------------------------------------------------------
DATABASE_URL = "postgresql+psycopg2://postgres:123@localhost:5432/Riwiventas"

try:
    engine = create_engine(DATABASE_URL)
    conn = engine.connect()
    print("\nConnection to PostgreSQL successful.")
    conn.close()
except Exception as e:
    print("Connection error:", e)
    raise

# ------------------------------------------------------------
# 5. Upload Tables to PostgreSQL
# ------------------------------------------------------------
print("\nUploading tables to PostgreSQL...")

df_sales.to_sql("sales", engine, if_exists="replace", index=False)
df_products.to_sql("products", engine, if_exists="replace", index=False)
df_customers.to_sql("customers", engine, if_exists="replace", index=False)

print("Tables uploaded successfully.")

# ------------------------------------------------------------
# 6. Save CSV Backups
# ------------------------------------------------------------
os.makedirs("backups", exist_ok=True)

df_sales.to_csv("backups/sales.csv", index=False)
df_products.to_csv("backups/products.csv", index=False)
df_customers.to_csv("backups/customers.csv", index=False)

print("\nBackups saved in 'backups' folder.")

# ------------------------------------------------------------
# 7. Verify Upload in PostgreSQL
# ------------------------------------------------------------
print("\nVerifying data upload...")

with engine.connect() as conn:
    sales_count = pd.read_sql("SELECT COUNT(*) FROM sales", conn)
    products_count = pd.read_sql("SELECT COUNT(*) FROM products", conn)
    customers_count = pd.read_sql("SELECT COUNT(*) FROM customers", conn)

display(sales_count)
display(products_count)
display(customers_count)

print("\nVerification completed: Data uploaded successfully.")

# ------------------------------------------------------------
# 8. Completion Message
# ------------------------------------------------------------
print("HU1 COMPLETED")


Libraries imported successfully.

CSV file loaded successfully.


Unnamed: 0,city,date,product,product_type,quantity,unit_price,sale_type,customer_type,discount,shipping_cost,total_sales
0,Santiago,2025-10-30,Arepa,Abarrotes,2.0,3681.0,Online,Minorista,0.2,0.0,5889.0
1,Córdoba,2025-11-17,Arepa,Abarrotes,7.0,2321.0,Distribuidor,Gobierno,0.15,0.0,13809.0
2,Barranquilla,2025-10-22,Leche,Lácteo,9.0,3540.0,Distribuidor,Gobierno,0.2,0.0,25488.0
3,New York,2025-10-20,Cereal,Lácteo,3.0,3287.0,Tienda_Física,Gobierno,0.05,0.0,9367.0
4,Madrid,2025-10-20,Leche,Hogar,2.0,3414.0,Distribuidor,Mayorista,0.0,0.0,6828.0



Derived tables created:


Unnamed: 0,product,product_type,unit_price
0,Arepa,Abarrotes,3681.0
1,Arepa,Abarrotes,2321.0
2,Leche,Lácteo,3540.0
3,Cereal,Lácteo,3287.0
4,Leche,Hogar,3414.0


Unnamed: 0,customer_type
0,Minorista
1,Gobierno
4,Mayorista
7,Corporativo
16,



Connection to PostgreSQL successful.

Uploading tables to PostgreSQL...
Tables uploaded successfully.

Backups saved in 'backups' folder.

Verifying data upload...


Unnamed: 0,count
0,1250000


Unnamed: 0,count
0,289286


Unnamed: 0,count
0,25



Verification completed: Data uploaded successfully.
HU1 COMPLETED
