In [29]:
import pandas as pd
import json
import random
import time
import hashlib
import os
import uuid

# E commerce data streaming simulator

#### What to do when there’s not data to work with? just produce your own!

#### Goal:

Create an computers and video games e-commerce and simulate the purchases in order to build a data frame

1. Choose 3 shoes brands
2. Choose 3 clothes brands
3. Prices by clothes and shoes for every brand with their commission
4. The most popular cities in your country
5. Payment type
6. Marketing (social media, Organic, Publicity)
7. Orders
8. Stores with their coords (lat/lng)


#### Creating data


In [30]:
CITIES = [
    'Ciudad de México',
    'Guadalajara',
    'Queretaro',
    'Monterrey',
    'Puebla'
]

SOURCE_PURCHASE = ['ONLINE', 'ORGANIC']

PAYMENT_ONLINE = ['Debit', 'Credit']

PAYMENT_STORE = ['Cash', 'Debit', 'Credit']

MARKETING = [
    'Social media',
    'Publicity',
    'Organic'
]


STATUS_PURCHASED = [
    'COMPLETED',
    'REJECTED',
    'INSUFFICIENT_FUNDS',
    'FAILED_API',
    'FRAUD',
    'COMPLETED',
    'COMPLETED',
    'COMPLETED',
]


STORE_COORDS_BY_CITY = {
    'Ciudad de México':[
    (19.372879, -99.049378),
    (19.428502, -99.162914),
    (19.355778, -99.153214),
    (19.355778, -99.153214)],
    'Guadalajara':[
    (20.690072, -103.301842),
    (20.670411, -103.354498),
    (20.693754, -103.381888),],
    'Queretaro':[
    (20.606305, -100.412364),
    (20.623607, -100.440612),
    (20.655995, -100.399978),],
    'Monterrey':[
    (25.713272, -100.277447),
    (25.732508, -100.234559),
    (25.715347, -100.344189),
    (25.744479, -100.409122),],
    'Puebla':[
    (18.973534, -98.252895),
    (18.971747, -98.215115),
    (19.016400, -98.183632),]
}


In [31]:
def get_payment_method(source:str):
    if source == 'ORGANIC':
        payment = random.choice(PAYMENT_STORE)
        status = 'COMPLETED'
        order_type = 'STORE'
    else:
        payment = random.choice(PAYMENT_ONLINE)
        status = random.choice(STATUS_PURCHASED)
        order_type = 'ONLINE'
    
    return payment, status, order_type

In [32]:
get_payment_method(random.choice(SOURCE_PURCHASE))

('Credit', 'COMPLETED', 'ONLINE')

In [33]:
def get_store_coords(city:str):
    return random.choice(STORE_COORDS_BY_CITY[city])

In [34]:
city = random.choice(CITIES)
coords = get_store_coords(city)
print(f'city: {city} - location: {coords}')

city: Monterrey - location: (25.744479, -100.409122)


In [35]:
# Get the inventory

inventory_df = pd.read_excel('./ecomerce_datexland.xlsx')
inventory_df

Unnamed: 0,PRODUCT_NAME,PRICING,COMISION,BRAND,CATEGORY
0,Laptop_Brand1_4GB,1350000,0.2,Brand1,LAPTOPS
1,Laptop_Brand1_8GB,2550000,0.25,Brand1,LAPTOPS
2,Laptop_Brand1_16GB,3500000,0.3,Brand1,LAPTOPS
3,Laptop_Brand1_32GB,4800000,0.35,Brand1,GAMING
4,Laptop_Brand2_4GB,1150000,0.15,Brand2,LAPTOPS
5,Laptop_Brand2_8GB,1850000,0.18,Brand2,LAPTOPS
6,Laptop_Brand2_16GB,3890000,0.2,Brand2,LAPTOPS
7,Laptop_Brand2_32GB,6990000,0.25,Brand2,GAMING
8,Laptop_Brand3_4GB,1850000,0.2,Brand3,LAPTOPS
9,Laptop_Brand3_8GB,3199000,0.28,Brand3,LAPTOPS


#### Purchase simulation

In [36]:

def simulate_purchases(num_purchases:int, df_inventory:pd.DataFrame):
    data_purchase = []
    x = 0
    while x < num_purchases:
        date = pd.to_datetime('today').strftime('%Y-%m-%d %H:%M:%S')
        product = df_inventory['PRODUCT_NAME'][random.randint(0,len(df_inventory))]
        pricing = df_inventory[df_inventory['PRODUCT_NAME'] == product]['PRICING'].values[0]
        commission = df_inventory[df_inventory['PRODUCT_NAME'] == product]['COMISION'].values[0]
        brand = df_inventory[df_inventory['PRODUCT_NAME'] == product]['BRAND'].values[0]
        category = df_inventory[df_inventory['PRODUCT_NAME'] == product]['CATEGORY'].values[0]
        source_purchase = random.choice(SOURCE_PURCHASE)
        payment,status, order_type = get_payment_method(source_purchase)
        city = random.choice(CITIES)
        latitude, longitude = get_store_coords(city)

        purchase = {
            'purchase_id':str(uuid.uuid4()),
            'product_name':product,
            'pricing':str(pricing),
            'commission':str(commission),
            'brand':brand,
            'category':category,
            'source_purchase':source_purchase,
            'payment':payment,
            'status':status,
            'order_type':order_type,
            'city':city,
            'created_at':date,
            'latitude':str(latitude),
            'longitude':str(longitude),
        }

        data_purchase.append(purchase)

        x += 1
        time.sleep(random.choice([1,2]))

    return data_purchase


In [38]:
purchases = simulate_purchases(num_purchases=20, df_inventory=inventory_df)
purchases

KeyError: 'PRODUCT_NAME'