In [1]:
from modules.read import Read
from modules.upload import Upload
from modules.enriched import Enriched
import pandas as pd

In [2]:
extract = Read()
load = Upload()
transform = Enriched()

# Lectura S3: Capa Landing

In [3]:
categories = extract.s3_read('retail-multisource-pipeline', 'landing/categories.csv')
customers = extract.s3_read('retail-multisource-pipeline', 'landing/customers.csv')
departments = extract.s3_read('retail-multisource-pipeline', 'landing/departments.csv')
order_items = extract.s3_read('retail-multisource-pipeline', 'landing/order_items.csv')
orders = extract.s3_read('retail-multisource-pipeline', 'landing/orders.csv')
products = extract.s3_read('retail-multisource-pipeline', 'landing/products.csv')

# Transformación 

## Enriquecimientos

### Cagetories

In [4]:
enriched_categories = transform.enriched_categories(categories, departments)

In [5]:
enriched_categories.head()

Unnamed: 0,category_id,category_department_id,category_name,_id,department_id,department_name
0,1,2,football,68958d94ef1d1c6340e365a3,2.0,Fitness
1,2,2,soccer,68958d94ef1d1c6340e365a3,2.0,Fitness
2,3,2,baseball & softball,68958d94ef1d1c6340e365a3,2.0,Fitness
3,4,2,basketball,68958d94ef1d1c6340e365a3,2.0,Fitness
4,5,2,lacrosse,68958d94ef1d1c6340e365a3,2.0,Fitness


### Customers

In [6]:
enriched_customers = transform.enriched_customers(customers)

In [7]:
enriched_customers.head()

Unnamed: 0,customer_id,customer_fname,customer_lname,customer_email,customer_password,customer_street,customer_city,customer_state,customer_zipcode,customer_fullname
0,1,Richard,Hernandez,XXXXXXXXX,XXXXXXXXX,6303 Heather Plaza,Brownsville,TX,78521,Richard Hernandez
1,2,Mary,Barrett,XXXXXXXXX,XXXXXXXXX,9526 Noble Embers Ridge,Littleton,CO,80126,Mary Barrett
2,3,Ann,Smith,XXXXXXXXX,XXXXXXXXX,3422 Blue Pioneer Bend,Caguas,PR,725,Ann Smith
3,4,Mary,Jones,XXXXXXXXX,XXXXXXXXX,8324 Little Common,San Marcos,CA,92069,Mary Jones
4,5,Robert,Hudson,XXXXXXXXX,XXXXXXXXX,10 Crystal River Mall,Caguas,PR,725,Robert Hudson


### Products

In [8]:
enriched_products = transform.enriched_products(products, enriched_categories)

In [9]:
enriched_products.head()

Unnamed: 0,product_id,product_category_id,product_name,product_description,product_price,product_image,category_id,category_department_id,category_name,_id,department_id,department_name,product_image_valid
0,1,2,Quest Q64 10 FT. x 10 FT. Slant Leg Instant U,,59.98,http://images.acmesports.sports/Quest+Q64+10+F...,2.0,2.0,soccer,68958d94ef1d1c6340e365a3,2.0,Fitness,True
1,2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+M...,2.0,2.0,soccer,68958d94ef1d1c6340e365a3,2.0,Fitness,True
2,3,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+M...,2.0,2.0,soccer,68958d94ef1d1c6340e365a3,2.0,Fitness,True
3,4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+M...,2.0,2.0,soccer,68958d94ef1d1c6340e365a3,2.0,Fitness,True
4,5,2,Riddell Youth Revolution Speed Custom Footbal,,199.99,http://images.acmesports.sports/Riddell+Youth+...,2.0,2.0,soccer,68958d94ef1d1c6340e365a3,2.0,Fitness,True


### Order_items

In [10]:
enriched_order_items = transform.enriched_order_items(order_items, enriched_products)

In [11]:
enriched_order_items.head()

Unnamed: 0,_id_x,order_item_id,order_item_order_id,order_item_product_id,order_item_quantity,order_item_subtotal,order_item_product_price,order_item_total,product_id,product_category_id,...,product_description,product_price,product_image,category_id,category_department_id,category_name,_id_y,department_id,department_name,product_image_valid
0,68958d95ef1d1c6340e365bf,22,9,1073,1,199.99,199.99,199.99,1073,48,...,,199.99,http://images.acmesports.sports/Pelican+Sunstr...,48.0,7.0,water sports,68958d94ef1d1c6340e365a8,7.0,Fan Shop,True
1,68958d95ef1d1c6340e365d0,39,13,276,4,127.96,31.99,127.96,276,13,...,,31.99,http://images.acmesports.sports/Under+Armour+W...,13.0,3.0,electronics,68958d94ef1d1c6340e365a4,3.0,Footwear,True
2,68958d95ef1d1c6340e365da,49,16,365,5,299.95,59.99,299.95,365,17,...,,59.99,http://images.acmesports.sports/Perfect+Fitnes...,17.0,4.0,cleats,68958d94ef1d1c6340e365a5,4.0,Apparel,True
3,68958d95ef1d1c6340e365fa,81,28,191,1,99.99,99.99,99.99,191,9,...,,99.99,http://images.acmesports.sports/Nike+Men%27s+F...,9.0,3.0,cardio equipment,68958d94ef1d1c6340e365a4,3.0,Footwear,True
4,68958d95ef1d1c6340e36603,90,33,403,1,129.99,129.99,129.99,403,18,...,,129.99,http://images.acmesports.sports/Nike+Men%27s+C...,18.0,4.0,men's footwear,68958d94ef1d1c6340e365a5,4.0,Apparel,True


### Orders

In [12]:
enriched_orders = transform.enriched_orders(orders, enriched_order_items, enriched_customers)

In [13]:
enriched_orders.head()

Unnamed: 0,order_id,order_date,order_customer_id,order_status,order_month,order_year,order_total,customer_id,customer_fname,customer_lname,customer_email,customer_password,customer_street,customer_city,customer_state,customer_zipcode,customer_fullname
0,1,2013-07-25,11599,CLOSED,7,2013,299.98,11599,Mary,Malone,XXXXXXXXX,XXXXXXXXX,8708 Indian Horse Highway,Hickory,NC,28601,Mary Malone
1,2,2013-07-25,256,PENDING_PAYMENT,7,2013,579.98,256,David,Rodriguez,XXXXXXXXX,XXXXXXXXX,7605 Tawny Horse Falls,Chicago,IL,60625,David Rodriguez
2,3,2013-07-25,12111,COMPLETE,7,2013,,12111,Amber,Franco,XXXXXXXXX,XXXXXXXXX,8766 Clear Prairie Line,Santa Cruz,CA,95060,Amber Franco
3,4,2013-07-25,8827,CLOSED,7,2013,699.85,8827,Brian,Wilson,XXXXXXXXX,XXXXXXXXX,8396 High Corners,San Antonio,TX,78240,Brian Wilson
4,5,2013-07-25,11318,COMPLETE,7,2013,1129.86,11318,Mary,Henry,XXXXXXXXX,XXXXXXXXX,3047 Silent Embers Maze,Caguas,PR,725,Mary Henry


## Carga Enriquecimientos: Capa Processed

In [14]:
load.upload_amazon_s3_parquet(enriched_categories, 'retail-multisource-pipeline', 'processed/enriched/enriched_categories/enriched_categories.parquet')

In [15]:
load.upload_amazon_s3_parquet(enriched_customers, 'retail-multisource-pipeline', 'processed/enriched/enriched_customers/enriched_customers.parquet')

In [16]:
load.upload_amazon_s3_parquet(enriched_products, 'retail-multisource-pipeline', 'processed/enriched/enriched_products/enriched_products.parquet')

In [17]:
load.upload_amazon_s3_parquet(enriched_order_items, 'retail-multisource-pipeline', 'processed/enriched/enriched_order_items/enriched_order_items.parquet')

In [18]:
load.upload_amazon_s3_parquet(enriched_orders, 'retail-multisource-pipeline', 'processed/enriched/enriched_orders/enriched_orders.parquet')

## KPIs

### Ventas totales por mes


In [20]:
kpi_ventas_mes = transform.kpi_ventas_mes(enriched_orders)

In [21]:
kpi_ventas_mes.head()

Unnamed: 0,order_year,order_month,order_total
0,2013,7,764782.19
1,2013,8,2828658.7
2,2013,9,2934527.27
3,2013,10,2624600.61
4,2013,11,3168656.03


### Top 5 productos más vendidos

In [22]:
kpi_top_productos = transform.kpi_top_productos(enriched_order_items)

In [23]:
kpi_top_productos.head()

Unnamed: 0,product_name,order_item_quantity
60,Perfect Fitness Perfect Rip Deck,73698
48,Nike Men's Dri-FIT Victory Golf Polo,62956
56,O'Brien Men's Neoprene Life Vest,57803
50,Nike Men's Free 5.0+ Running Shoe,36680
85,Under Armour Girls' Toddler Spine Surge Runni,31735


### Ticket promedio

In [24]:
ticket_promedio = transform.kpi_ticket_promedio(enriched_orders)

In [25]:
ticket_promedio.head()

Unnamed: 0,ticket_promedio
0,597.632288


### Clientes recurrentes

In [26]:
clientes_recurrentes = transform.kpi_clientes_recurrentes(enriched_orders)

In [27]:
clientes_recurrentes.head()

Unnamed: 0,customer_id,order_id
1,2,4
2,3,7
3,4,6
4,5,4
5,6,4


### Ventas por estado

In [28]:
ventas_estado = transform.kpi_ventas_estado(enriched_orders)

In [29]:
ventas_estado.head()

Unnamed: 0,customer_state,order_total
0,AL,6401.4
1,AR,29241.61
2,AZ,566459.29
3,CA,5542723.0
4,CO,358310.6


## Carga KPIs: Capa Processed

In [30]:
load.upload_amazon_s3_parquet(enriched_categories, 'retail-multisource-pipeline', 'processed/kpis/kpi_ventas_mes/kpi_ventas_mes.parquet')

In [31]:
load.upload_amazon_s3_parquet(enriched_customers, 'retail-multisource-pipeline', 'processed/kpis/kpi_top_productos/kpi_top_productos.parquet')

In [32]:
load.upload_amazon_s3_parquet(enriched_products, 'retail-multisource-pipeline', 'processed/kpis/kpi_ticket_promedio/kpi_ticket_promedio.parquet')

In [33]:
load.upload_amazon_s3_parquet(enriched_order_items, 'retail-multisource-pipeline', 'processed/kpis/kpi_clientes_recurrentes/kpi_clientes_recurrentes.parquet')

In [34]:
load.upload_amazon_s3_parquet(enriched_orders, 'retail-multisource-pipeline', 'processed/kpis/kpi_ventas_estado/kpi_ventas_estado.parquet')