In [78]:
import pandas as pd
import json
import random
import time
import hashlib
import os
import uuid

# E commerce data streaming simulator

#### What to do when there’s not data to work with? just produce your own!

#### Goal:

Create an computers and video games e-commerce and simulate the purchases in order to build a data frame

1. Choose 3 shoes brands
2. Choose 3 clothes brands
3. Prices by clothes and shoes for every brand with their commission
4. The most popular cities in your country
5. Payment type
6. Marketing (social media, Organic, Publicity)
7. Orders
8. Stores with their coords (lat/lng)


#### Creating data


In [79]:
CITIES = [
    'Ciudad de México',
    'Guadalajara',
    'Queretaro',
    'Monterrey',
    'Puebla'
]

SOURCE_PURCHASE = ['ONLINE', 'ORGANIC']

PAYMENT_ONLINE = ['Debit', 'Credit']

PAYMENT_STORE = ['Cash', 'Debit', 'Credit']

MARKETING = [
    'Social media',
    'News',
    'Organic'
]


STATUS_PURCHASED = [
    'COMPLETED',
    'REJECTED',
    'INSUFFICIENT_FUNDS',
    'FAILED_API',
    'FRAUD',
    'COMPLETED',
    'COMPLETED',
    'COMPLETED',
]


STORE_COORDS_BY_CITY = {
    'Ciudad de México':[
    (19.372879, -99.049378),
    (19.428502, -99.162914),
    (19.355778, -99.153214),
    (19.355778, -99.153214)],
    'Guadalajara':[
    (20.690072, -103.301842),
    (20.670411, -103.354498),
    (20.693754, -103.381888),],
    'Queretaro':[
    (20.606305, -100.412364),
    (20.623607, -100.440612),
    (20.655995, -100.399978),],
    'Monterrey':[
    (25.713272, -100.277447),
    (25.732508, -100.234559),
    (25.715347, -100.344189),
    (25.744479, -100.409122),],
    'Puebla':[
    (18.973534, -98.252895),
    (18.971747, -98.215115),
    (19.016400, -98.183632),]
}


In [80]:
def get_payment_method(source:str):
    if source == 'ORGANIC':
        payment = random.choice(PAYMENT_STORE)
        status = 'COMPLETED'
        order_type = 'STORE'
    else:
        payment = random.choice(PAYMENT_ONLINE)
        status = random.choice(STATUS_PURCHASED)
        order_type = 'ONLINE'
    
    return payment, status, order_type

In [81]:
get_payment_method(random.choice(SOURCE_PURCHASE))

('Cash', 'COMPLETED', 'STORE')

In [82]:
def get_store_coords(city:str):
    return random.choice(STORE_COORDS_BY_CITY[city])

In [83]:
city = random.choice(CITIES)
coords = get_store_coords(city)
print(f'city: {city} - location: {coords}')

city: Queretaro - location: (20.655995, -100.399978)


In [84]:
# Get the inventory

inventory_df = pd.read_excel('./ecomerce_datexland.xlsx')
inventory_df

Unnamed: 0,PRODUCT_NAME,PRICING,COMISION,BRAND,CATEGORY
0,Laptop_Brand1_4GB,1350000,0.2,Brand1,LAPTOPS
1,Laptop_Brand1_8GB,2550000,0.25,Brand1,LAPTOPS
2,Laptop_Brand1_16GB,3500000,0.3,Brand1,LAPTOPS
3,Laptop_Brand1_32GB,4800000,0.35,Brand1,GAMING
4,Laptop_Brand2_4GB,1150000,0.15,Brand2,LAPTOPS
5,Laptop_Brand2_8GB,1850000,0.18,Brand2,LAPTOPS
6,Laptop_Brand2_16GB,3890000,0.2,Brand2,LAPTOPS
7,Laptop_Brand2_32GB,6990000,0.25,Brand2,GAMING
8,Laptop_Brand3_4GB,1850000,0.2,Brand3,LAPTOPS
9,Laptop_Brand3_8GB,3199000,0.28,Brand3,LAPTOPS


## Purchase simulation

In [85]:

def simulate_purchases(num_purchases:int, df_inventory:pd.DataFrame):
    data_purchase = []
    x = 0
    while x < num_purchases:
        date = pd.to_datetime('today').strftime('%Y-%m-%d %H:%M:%S')
        product = df_inventory['PRODUCT_NAME '][random.randint(0,len(df_inventory)-1)]
        pricing = df_inventory[df_inventory['PRODUCT_NAME '] == product]['PRICING'].values[0]
        commission = df_inventory[df_inventory['PRODUCT_NAME '] == product]['COMISION'].values[0]
        brand = df_inventory[df_inventory['PRODUCT_NAME '] == product]['BRAND'].values[0]
        category = df_inventory[df_inventory['PRODUCT_NAME '] == product]['CATEGORY'].values[0]
        source_purchase = random.choice(SOURCE_PURCHASE)
        payment,status, order_type = get_payment_method(source_purchase)
        city = random.choice(CITIES)
        latitude, longitude = get_store_coords(city)
        marketing = random.choice(MARKETING)

        purchase = {
            'purchase_id':str(uuid.uuid4()),
            'product_name':product,
            'pricing':str(pricing),
            'commission':str(commission),
            'brand':brand,
            'category':category,
            'marketing':marketing,
            'source_purchase':source_purchase,
            'payment':payment,
            'status':status,
            'order_type':order_type,
            'city':city,
            'created_at':date,
            'latitude':str(latitude),
            'longitude':str(longitude),
        }

        data_purchase.append(pd.DataFrame(purchase, index=[x]))

        x += 1
        time.sleep(random.choice([1,2]))

    return data_purchase


In [86]:
purchases = simulate_purchases(num_purchases=20, df_inventory=inventory_df)

In [87]:
purchases_df = pd.concat(purchases, ignore_index=True)
purchases_df.head(10)

Unnamed: 0,purchase_id,product_name,pricing,commission,brand,category,marketing,source_purchase,payment,status,order_type,city,created_at,latitude,longitude
0,0b870fa3-83a9-4b5b-b821-540666fe0d48,Console_Switch,2249000,0.2,Nintendo,VIDEO GAMES,Social media,ONLINE,Debit,COMPLETED,ONLINE,Puebla,2024-01-17 12:47:58,19.0164,-98.183632
1,dd040316-fcaa-437e-9fa7-dc439b097f95,Laptop_Brand2_16GB,3890000,0.2,Brand2,LAPTOPS,News,ORGANIC,Cash,COMPLETED,STORE,Guadalajara,2024-01-17 12:47:59,20.690072,-103.301842
2,2b44ef37-2d69-4fba-a449-60199d725339,Console_PS5,3560000,0.3,PS5,VIDEO GAMES,News,ORGANIC,Cash,COMPLETED,STORE,Queretaro,2024-01-17 12:48:00,20.623607,-100.440612
3,615caf2e-b0f3-42fb-ad10-7eafc4206bdd,Laptop_Brand3_32GB,18999000,0.4,Brand3,GAMING,Social media,ORGANIC,Cash,COMPLETED,STORE,Puebla,2024-01-17 12:48:01,18.973534,-98.252895
4,2c723621-e976-4c4d-b61e-f36d186b217e,Laptop_Brand2_16GB,3890000,0.2,Brand2,LAPTOPS,Organic,ORGANIC,Debit,COMPLETED,STORE,Monterrey,2024-01-17 12:48:03,25.732508,-100.234559
5,bdbd2e7a-e661-43a9-a665-6375d2d2b72d,Console_Switch,2249000,0.2,Nintendo,VIDEO GAMES,News,ONLINE,Credit,REJECTED,ONLINE,Guadalajara,2024-01-17 12:48:04,20.693754,-103.381888
6,1411f5b8-7f26-40a0-9861-82ce9ba502d9,Laptop_Brand2_32GB,6990000,0.25,Brand2,GAMING,News,ONLINE,Credit,COMPLETED,ONLINE,Puebla,2024-01-17 12:48:06,18.971747,-98.215115
7,a76d33d5-e93b-4212-bb2e-6d41c0f9f727,Laptop_Brand2_8GB,1850000,0.18,Brand2,LAPTOPS,Organic,ORGANIC,Cash,COMPLETED,STORE,Ciudad de México,2024-01-17 12:48:07,19.355778,-99.153214
8,0c1d7a34-6021-4f4e-a9c6-2f2b3bbed22e,Laptop_Brand1_4GB,1350000,0.2,Brand1,LAPTOPS,Social media,ONLINE,Debit,INSUFFICIENT_FUNDS,ONLINE,Ciudad de México,2024-01-17 12:48:09,19.372879,-99.049378
9,e225e29e-0ba8-45b1-8a7a-33c33bc872d7,Laptop_Brand3_4GB,1850000,0.2,Brand3,LAPTOPS,Organic,ONLINE,Credit,COMPLETED,ONLINE,Guadalajara,2024-01-17 12:48:10,20.693754,-103.381888


#### What’s the payment status for every purchase?

In [88]:
purchases_df['status'].value_counts()

status
COMPLETED             17
REJECTED               1
INSUFFICIENT_FUNDS     1
FRAUD                  1
Name: count, dtype: int64

#### What’s type publicity is the most effective?

In [89]:
purchases_df['marketing'].value_counts()

marketing
News            9
Social media    6
Organic         5
Name: count, dtype: int64

#### What product is the best seller?

In [90]:
purchases_df['product_name'].value_counts()

product_name
Console_Switch        4
Laptop_Brand2_16GB    4
Laptop_Brand1_8GB     4
Console_PS5           1
Laptop_Brand3_32GB    1
Laptop_Brand2_32GB    1
Laptop_Brand2_8GB     1
Laptop_Brand1_4GB     1
Laptop_Brand3_4GB     1
Laptop_Brand3_16GB    1
Laptop_Brand3_8GB     1
Name: count, dtype: int64