In [1]:
import pandas as pd
import numpy as np
import os
import re

# Set working directory
if not "/data/tables" in os.getcwd():
    os.chdir("../data/tables")

from pyspark.sql import SparkSession
from pyspark.shell import spark
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
import matplotlib.pyplot as plt

spark = (
    SparkSession.builder.appName("MAST30034 Project 2")
    .config("spark.sql.repl.eagerEval.enabled", True) 
    .config("spark.sql.parquet.cacheMetadata", "true")
    .config("spark.sql.session.timeZone", "Etc/UTC")
    .config("spark.driver.memory", "4g")
    .getOrCreate()
)

Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.3.0
      /_/

Using Python version 3.9.12 (main, Apr  4 2022 05:22:27)
Spark context Web UI available at http://192.168.1.6:4041
Spark context available as 'sc' (master = local[*], app id = local-1662893482505).
SparkSession available as 'spark'.


In [2]:
# Read csv file
consumer = spark.read.option("delimiter", "|").csv('tbl_consumer.csv', header = True)
consumer_postcode = consumer.drop("name","address")
consumer_postcode

state,postcode,gender,consumer_id
WA,6935,Female,1195503
NSW,2782,Female,179208
NT,862,Female,1194530
NSW,2780,Female,154128
WA,6355,Female,712975
NSW,2033,Female,407340
QLD,4606,Female,511685
WA,6056,Male,448088
NSW,2482,Female,650435
VIC,3220,Female,1058499


In [3]:
consumer_postcode = consumer_postcode.toPandas()
consumer_postcode

Unnamed: 0,state,postcode,gender,consumer_id
0,WA,6935,Female,1195503
1,NSW,2782,Female,179208
2,NT,862,Female,1194530
3,NSW,2780,Female,154128
4,WA,6355,Female,712975
...,...,...,...,...
499994,QLD,4400,Female,1385608
499995,VIC,3097,Undisclosed,1466964
499996,NSW,2756,Undisclosed,1253484
499997,VIC,3989,Female,175005


In [4]:
consumer_postcode["postcode"] = consumer_postcode["postcode"].apply(lambda x : int(x))

In [5]:
import pandas as pd
import io
import requests
url="https://www.matthewproctor.com/Content/postcodes/australian_postcodes.csv"
s=requests.get(url).content
df=pd.read_csv(io.StringIO(s.decode('utf-8')))
sa2_code = df[['postcode','SA2_MAINCODE_2016']]
sa2_code.rename(columns = {'SA2_MAINCODE_2016':'SA2_code'},inplace=True)
sa2_code = sa2_code.drop_duplicates()

sa2_code

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sa2_code.rename(columns = {'SA2_MAINCODE_2016':'SA2_code'},inplace=True)


Unnamed: 0,postcode,SA2_code
0,200,801051049.0
2,800,701011002.0
4,801,701011002.0
5,804,701011007.0
6,810,701021010.0
...,...,...
18437,9013,305011105.0
18438,9015,305011105.0
18439,9464,302031038.0
18440,9726,309101268.0


In [6]:
income = pd.read_csv("./SA2/AUS/2021Census_G02_AUST_SA2.csv")
income = income[['SA2_CODE_2021','Median_age_persons','Median_tot_prsnl_inc_weekly']]
income.rename(columns = {'SA2_CODE_2021':'SA2_code'},inplace=True)
income

Unnamed: 0,SA2_code,Median_age_persons,Median_tot_prsnl_inc_weekly
0,101021007,51,760
1,101021008,38,975
2,101021009,37,996
3,101021010,36,1104
4,101021012,37,1357
...,...,...,...
2467,901021002,40,741
2468,901031003,38,585
2469,901041004,50,736
2470,997979799,0,0


# join

In [7]:
merge1 = pd.merge(consumer_postcode, sa2_code, how='left', on = 'postcode')
merge2 = pd.merge(merge1,income, how = 'left',on='SA2_code')
merge2

Unnamed: 0,state,postcode,gender,consumer_id,SA2_code,Median_age_persons,Median_tot_prsnl_inc_weekly
0,WA,6935,Female,1195503,504031066.0,40.0,749.0
1,NSW,2782,Female,179208,124011455.0,50.0,740.0
2,NT,862,Female,1194530,702021055.0,26.0,416.0
3,NT,862,Female,1194530,702051066.0,28.0,276.0
4,NT,862,Female,1194530,702021056.0,33.0,671.0
...,...,...,...,...,...,...,...
882478,NSW,2756,Undisclosed,1253484,124041466.0,39.0,790.0
882479,NSW,2756,Undisclosed,1253484,115031300.0,40.0,907.0
882480,NSW,2756,Undisclosed,1253484,115041301.0,39.0,920.0
882481,VIC,3989,Female,175005,,,


In [8]:
!pip install pandasql



In [9]:
from pandasql import sqldf

query = """
SELECT state, postcode, gender, consumer_id,
avg(Median_age_persons) as mean_age,
avg(Median_tot_prsnl_inc_weekly) as mean_tot_prsnl_inc_weekly

from merge2
group by state, postcode, gender, consumer_id
order by consumer_id

"""


df1 = sqldf(query)

df1

Unnamed: 0,state,postcode,gender,consumer_id,mean_age,mean_tot_prsnl_inc_weekly
0,QLD,4426,Female,10,42.0,770.0
1,NSW,1040,Female,100,,
2,VIC,3903,Male,1000002,55.0,562.0
3,VIC,3328,Female,1000003,40.0,780.0
4,WA,6513,Male,1000006,45.0,830.0
...,...,...,...,...,...,...
499994,NSW,2402,Female,999993,50.0,533.0
499995,VIC,3231,Male,999994,55.0,881.0
499996,NSW,2535,Female,999995,52.0,783.0
499997,WA,6027,Undisclosed,999997,40.4,889.6


In [11]:
path = "transactions_20210228_20210827_snapshot/"
list_files = os.listdir(path)
list_files = list_files[1:(len(list_files)-1)]

# import modules
from pyspark.sql import SparkSession
import functools
 
# explicit function
def unionAll(dfs):
    return functools.reduce(lambda df1, df2: df1.union(df2.select(df1.columns)), dfs)

# read files
file_name = os.listdir(path+ list_files[0])[1]
transaction = spark.read.parquet(path+ list_files[0] +"/" + file_name)
transaction = transaction.withColumn('order_datetime',lit(list_files[0][15:]))
for i in list_files[1:]:
    file_name = os.listdir(path + i)[1]
    tmp = spark.read.parquet(path+ i +"/" + file_name)
    tmp = tmp.withColumn('order_datetime',lit(i[15:]))
    transaction = unionAll([transaction, tmp] )

transaction

user_id,merchant_abn,dollar_value,order_id,order_datetime
1,28000487688,133.22689421562643,0c37b3f7-c7f1-48c...,2021-02-28
18485,62191208634,79.13140006851712,9e18b913-0465-4fd...,2021-02-28
1,83690644458,30.441348317517228,40a2ff69-ea34-465...,2021-02-28
18488,39649557865,962.8133405407584,f4c1a5ae-5b76-40d...,2021-02-28
2,80779820715,48.12397733548124,cd09bdd6-f56d-489...,2021-02-28
18489,43186523025,98.14878546968934,9008a98e-1b02-4de...,2021-02-28
3,29566626791,46.33087226118639,26b7574e-81c2-455...,2021-02-28
18490,93558142492,232.83335268750145,2bda0665-796f-4f2...,2021-02-28
3,32361057556,87.34942171371054,633a7656-2fcc-4b8...,2021-02-28
18491,64974914166,130.12601873970038,4bc15338-83eb-43d...,2021-02-28


In [23]:
user_detail = spark.read.parquet("consumer_user_details.parquet")
user_detail

user_id,consumer_id
1,1195503
2,179208
3,1194530
4,154128
5,712975
6,407340
7,511685
8,448088
9,650435
10,1058499


In [24]:
# Join transaction data with customer data
customer_transaction = (transaction.join(user_detail, transaction.user_id == user_detail.user_id)
                                   .drop(user_detail.user_id))

customer_transaction = (customer_transaction.join(consumer, customer_transaction.consumer_id == consumer.consumer_id)
                                            .drop(consumer.consumer_id)
                                            .select(transaction['*'], consumer.postcode, consumer.state, consumer.gender))
#make 'month' column extracting from 'order_datetime'
customer_transaction = customer_transaction.select(col("*"),col("order_datetime"),
          month(col("order_datetime")).alias('month')
                           )
customer_transaction

user_id,merchant_abn,dollar_value,order_id,order_datetime,postcode,state,gender,order_datetime.1,month
3698,55778594682,21.941266654463465,8a806e0d-558d-468...,2021-02-28,2299,NSW,Male,2021-02-28,2
3698,10648956813,99.30549322421652,e493f287-efe0-425...,2021-03-03,2299,NSW,Male,2021-03-03,3
3698,75089928159,3.363306277086005,94b7fb1a-82d1-422...,2021-03-06,2299,NSW,Male,2021-03-06,3
3698,42543374304,351.2979463642349,451bdc08-cc6e-41b...,2021-03-08,2299,NSW,Male,2021-03-08,3
3698,54611298155,2034.6296050908,dccaacc6-a0fd-44c...,2021-03-10,2299,NSW,Male,2021-03-10,3
3698,24852446429,22.143273437576056,c6b2cfa3-2494-4fa...,2021-03-12,2299,NSW,Male,2021-03-12,3
3698,63123845164,384.9929053083648,ee723c3a-ad57-4b3...,2021-03-12,2299,NSW,Male,2021-03-12,3
3698,63290521567,42.178347325192256,7d5afc38-5be8-4c6...,2021-03-12,2299,NSW,Male,2021-03-12,3
3698,42355028515,76.8823425915479,274dfcce-a369-46c...,2021-03-14,2299,NSW,Male,2021-03-14,3
3698,65674339048,80.50821804740839,dd90c0e0-e343-40c...,2021-03-16,2299,NSW,Male,2021-03-16,3


In [32]:
# 상점 별로, user id당 몇명씩 방문했는지
customer_transaction.createOrReplaceTempView("customer_transaction")


customer_transaction_count = spark.sql(
    """
    Select  user_id, merchant_abn,
    count(user_id) as count_user_id 

    from customer_transaction
    group by user_id, merchant_abn 
    order by  count_user_id desc
    """)
customer_transaction_count

user_id,merchant_abn,count_user_id
18011,24852446429,12
15077,24852446429,12
5024,43186523025,12
9928,64203420245,11
11998,86578477987,11
8633,86578477987,11
13856,24852446429,11
21272,86578477987,11
21223,49891706470,11
1640,64203420245,11


In [34]:
customer_transaction_count.createOrReplaceTempView("customer_transaction_count")


customer_transaction_count_mean = spark.sql(
    """
    Select  user_id, merchant_abn,count_user_id,
    avg(count_user_id) as mean_visited_user 

    from customer_transaction_count
    group by merchant_abn 
    order by  mean_visited_user desc
    """)
customer_transaction_count_mean

AnalysisException: expression 'customer_transaction_count.user_id' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
Sort [mean_visited_user#8397 DESC NULLS LAST], true
+- Aggregate [merchant_abn#81L], [user_id#80L, merchant_abn#81L, count_user_id#8313L, avg(count_user_id#8313L) AS mean_visited_user#8397]
   +- SubqueryAlias customer_transaction_count
      +- View (`customer_transaction_count`, [user_id#80L,merchant_abn#81L,count_user_id#8313L])
         +- Sort [count_user_id#8313L DESC NULLS LAST], true
            +- Aggregate [user_id#80L, merchant_abn#81L], [user_id#80L, merchant_abn#81L, count(user_id#80L) AS count_user_id#8313L]
               +- SubqueryAlias customer_transaction
                  +- View (`customer_transaction`, [user_id#80L,merchant_abn#81L,dollar_value#82,order_id#83,order_datetime#88,postcode#20,state#19,gender#21,order_datetime#88,month#6959])
                     +- Project [user_id#80L, merchant_abn#81L, dollar_value#82, order_id#83, order_datetime#88, postcode#20, state#19, gender#21, order_datetime#88, month(cast(order_datetime#88 as date)) AS month#6959]
                        +- Project [user_id#80L, merchant_abn#81L, dollar_value#82, order_id#83, order_datetime#88, postcode#20, state#19, gender#21]
                           +- Project [user_id#80L, merchant_abn#81L, dollar_value#82, order_id#83, order_datetime#88, consumer_id#6874L, name#17, address#18, state#19, postcode#20, gender#21]
                              +- Join Inner, (consumer_id#6874L = cast(consumer_id#22 as bigint))
                                 :- Project [user_id#80L, merchant_abn#81L, dollar_value#82, order_id#83, order_datetime#88, consumer_id#6874L]
                                 :  +- Join Inner, (user_id#80L = user_id#6873L)
                                 :     :- Union false, false
                                 :     :  :- Project [user_id#80L, merchant_abn#81L, dollar_value#82, order_id#83, 2021-02-28 AS order_datetime#88]
                                 :     :  :  +- Relation [user_id#80L,merchant_abn#81L,dollar_value#82,order_id#83] parquet
                                 :     :  :- Project [user_id#95L, merchant_abn#96L, dollar_value#97, order_id#98, 2021-03-01 AS order_datetime#103]
                                 :     :  :  +- Relation [user_id#95L,merchant_abn#96L,dollar_value#97,order_id#98] parquet
                                 :     :  :- Project [user_id#120L, merchant_abn#121L, dollar_value#122, order_id#123, 2021-03-02 AS order_datetime#128]
                                 :     :  :  +- Relation [user_id#120L,merchant_abn#121L,dollar_value#122,order_id#123] parquet
                                 :     :  :- Project [user_id#145L, merchant_abn#146L, dollar_value#147, order_id#148, 2021-03-03 AS order_datetime#153]
                                 :     :  :  +- Relation [user_id#145L,merchant_abn#146L,dollar_value#147,order_id#148] parquet
                                 :     :  :- Project [user_id#170L, merchant_abn#171L, dollar_value#172, order_id#173, 2021-03-04 AS order_datetime#178]
                                 :     :  :  +- Relation [user_id#170L,merchant_abn#171L,dollar_value#172,order_id#173] parquet
                                 :     :  :- Project [user_id#195L, merchant_abn#196L, dollar_value#197, order_id#198, 2021-03-05 AS order_datetime#203]
                                 :     :  :  +- Relation [user_id#195L,merchant_abn#196L,dollar_value#197,order_id#198] parquet
                                 :     :  :- Project [user_id#220L, merchant_abn#221L, dollar_value#222, order_id#223, 2021-03-06 AS order_datetime#228]
                                 :     :  :  +- Relation [user_id#220L,merchant_abn#221L,dollar_value#222,order_id#223] parquet
                                 :     :  :- Project [user_id#245L, merchant_abn#246L, dollar_value#247, order_id#248, 2021-03-07 AS order_datetime#253]
                                 :     :  :  +- Relation [user_id#245L,merchant_abn#246L,dollar_value#247,order_id#248] parquet
                                 :     :  :- Project [user_id#270L, merchant_abn#271L, dollar_value#272, order_id#273, 2021-03-08 AS order_datetime#278]
                                 :     :  :  +- Relation [user_id#270L,merchant_abn#271L,dollar_value#272,order_id#273] parquet
                                 :     :  :- Project [user_id#295L, merchant_abn#296L, dollar_value#297, order_id#298, 2021-03-09 AS order_datetime#303]
                                 :     :  :  +- Relation [user_id#295L,merchant_abn#296L,dollar_value#297,order_id#298] parquet
                                 :     :  :- Project [user_id#320L, merchant_abn#321L, dollar_value#322, order_id#323, 2021-03-10 AS order_datetime#328]
                                 :     :  :  +- Relation [user_id#320L,merchant_abn#321L,dollar_value#322,order_id#323] parquet
                                 :     :  :- Project [user_id#345L, merchant_abn#346L, dollar_value#347, order_id#348, 2021-03-11 AS order_datetime#353]
                                 :     :  :  +- Relation [user_id#345L,merchant_abn#346L,dollar_value#347,order_id#348] parquet
                                 :     :  :- Project [user_id#370L, merchant_abn#371L, dollar_value#372, order_id#373, 2021-03-12 AS order_datetime#378]
                                 :     :  :  +- Relation [user_id#370L,merchant_abn#371L,dollar_value#372,order_id#373] parquet
                                 :     :  :- Project [user_id#395L, merchant_abn#396L, dollar_value#397, order_id#398, 2021-03-13 AS order_datetime#403]
                                 :     :  :  +- Relation [user_id#395L,merchant_abn#396L,dollar_value#397,order_id#398] parquet
                                 :     :  :- Project [user_id#420L, merchant_abn#421L, dollar_value#422, order_id#423, 2021-03-14 AS order_datetime#428]
                                 :     :  :  +- Relation [user_id#420L,merchant_abn#421L,dollar_value#422,order_id#423] parquet
                                 :     :  :- Project [user_id#445L, merchant_abn#446L, dollar_value#447, order_id#448, 2021-03-15 AS order_datetime#453]
                                 :     :  :  +- Relation [user_id#445L,merchant_abn#446L,dollar_value#447,order_id#448] parquet
                                 :     :  :- Project [user_id#470L, merchant_abn#471L, dollar_value#472, order_id#473, 2021-03-16 AS order_datetime#478]
                                 :     :  :  +- Relation [user_id#470L,merchant_abn#471L,dollar_value#472,order_id#473] parquet
                                 :     :  :- Project [user_id#495L, merchant_abn#496L, dollar_value#497, order_id#498, 2021-03-17 AS order_datetime#503]
                                 :     :  :  +- Relation [user_id#495L,merchant_abn#496L,dollar_value#497,order_id#498] parquet
                                 :     :  :- Project [user_id#520L, merchant_abn#521L, dollar_value#522, order_id#523, 2021-03-18 AS order_datetime#528]
                                 :     :  :  +- Relation [user_id#520L,merchant_abn#521L,dollar_value#522,order_id#523] parquet
                                 :     :  :- Project [user_id#545L, merchant_abn#546L, dollar_value#547, order_id#548, 2021-03-19 AS order_datetime#553]
                                 :     :  :  +- Relation [user_id#545L,merchant_abn#546L,dollar_value#547,order_id#548] parquet
                                 :     :  :- Project [user_id#570L, merchant_abn#571L, dollar_value#572, order_id#573, 2021-03-20 AS order_datetime#578]
                                 :     :  :  +- Relation [user_id#570L,merchant_abn#571L,dollar_value#572,order_id#573] parquet
                                 :     :  :- Project [user_id#595L, merchant_abn#596L, dollar_value#597, order_id#598, 2021-03-21 AS order_datetime#603]
                                 :     :  :  +- Relation [user_id#595L,merchant_abn#596L,dollar_value#597,order_id#598] parquet
                                 :     :  :- Project [user_id#620L, merchant_abn#621L, dollar_value#622, order_id#623, 2021-03-22 AS order_datetime#628]
                                 :     :  :  +- Relation [user_id#620L,merchant_abn#621L,dollar_value#622,order_id#623] parquet
                                 :     :  :- Project [user_id#645L, merchant_abn#646L, dollar_value#647, order_id#648, 2021-03-23 AS order_datetime#653]
                                 :     :  :  +- Relation [user_id#645L,merchant_abn#646L,dollar_value#647,order_id#648] parquet
                                 :     :  :- Project [user_id#670L, merchant_abn#671L, dollar_value#672, order_id#673, 2021-03-24 AS order_datetime#678]
                                 :     :  :  +- Relation [user_id#670L,merchant_abn#671L,dollar_value#672,order_id#673] parquet
                                 :     :  :- Project [user_id#695L, merchant_abn#696L, dollar_value#697, order_id#698, 2021-03-25 AS order_datetime#703]
                                 :     :  :  +- Relation [user_id#695L,merchant_abn#696L,dollar_value#697,order_id#698] parquet
                                 :     :  :- Project [user_id#720L, merchant_abn#721L, dollar_value#722, order_id#723, 2021-03-26 AS order_datetime#728]
                                 :     :  :  +- Relation [user_id#720L,merchant_abn#721L,dollar_value#722,order_id#723] parquet
                                 :     :  :- Project [user_id#745L, merchant_abn#746L, dollar_value#747, order_id#748, 2021-03-27 AS order_datetime#753]
                                 :     :  :  +- Relation [user_id#745L,merchant_abn#746L,dollar_value#747,order_id#748] parquet
                                 :     :  :- Project [user_id#770L, merchant_abn#771L, dollar_value#772, order_id#773, 2021-03-28 AS order_datetime#778]
                                 :     :  :  +- Relation [user_id#770L,merchant_abn#771L,dollar_value#772,order_id#773] parquet
                                 :     :  :- Project [user_id#795L, merchant_abn#796L, dollar_value#797, order_id#798, 2021-03-29 AS order_datetime#803]
                                 :     :  :  +- Relation [user_id#795L,merchant_abn#796L,dollar_value#797,order_id#798] parquet
                                 :     :  :- Project [user_id#820L, merchant_abn#821L, dollar_value#822, order_id#823, 2021-03-30 AS order_datetime#828]
                                 :     :  :  +- Relation [user_id#820L,merchant_abn#821L,dollar_value#822,order_id#823] parquet
                                 :     :  :- Project [user_id#845L, merchant_abn#846L, dollar_value#847, order_id#848, 2021-03-31 AS order_datetime#853]
                                 :     :  :  +- Relation [user_id#845L,merchant_abn#846L,dollar_value#847,order_id#848] parquet
                                 :     :  :- Project [user_id#870L, merchant_abn#871L, dollar_value#872, order_id#873, 2021-04-01 AS order_datetime#878]
                                 :     :  :  +- Relation [user_id#870L,merchant_abn#871L,dollar_value#872,order_id#873] parquet
                                 :     :  :- Project [user_id#895L, merchant_abn#896L, dollar_value#897, order_id#898, 2021-04-02 AS order_datetime#903]
                                 :     :  :  +- Relation [user_id#895L,merchant_abn#896L,dollar_value#897,order_id#898] parquet
                                 :     :  :- Project [user_id#920L, merchant_abn#921L, dollar_value#922, order_id#923, 2021-04-03 AS order_datetime#928]
                                 :     :  :  +- Relation [user_id#920L,merchant_abn#921L,dollar_value#922,order_id#923] parquet
                                 :     :  :- Project [user_id#945L, merchant_abn#946L, dollar_value#947, order_id#948, 2021-04-04 AS order_datetime#953]
                                 :     :  :  +- Relation [user_id#945L,merchant_abn#946L,dollar_value#947,order_id#948] parquet
                                 :     :  :- Project [user_id#970L, merchant_abn#971L, dollar_value#972, order_id#973, 2021-04-05 AS order_datetime#978]
                                 :     :  :  +- Relation [user_id#970L,merchant_abn#971L,dollar_value#972,order_id#973] parquet
                                 :     :  :- Project [user_id#995L, merchant_abn#996L, dollar_value#997, order_id#998, 2021-04-06 AS order_datetime#1003]
                                 :     :  :  +- Relation [user_id#995L,merchant_abn#996L,dollar_value#997,order_id#998] parquet
                                 :     :  :- Project [user_id#1020L, merchant_abn#1021L, dollar_value#1022, order_id#1023, 2021-04-07 AS order_datetime#1028]
                                 :     :  :  +- Relation [user_id#1020L,merchant_abn#1021L,dollar_value#1022,order_id#1023] parquet
                                 :     :  :- Project [user_id#1045L, merchant_abn#1046L, dollar_value#1047, order_id#1048, 2021-04-08 AS order_datetime#1053]
                                 :     :  :  +- Relation [user_id#1045L,merchant_abn#1046L,dollar_value#1047,order_id#1048] parquet
                                 :     :  :- Project [user_id#1070L, merchant_abn#1071L, dollar_value#1072, order_id#1073, 2021-04-09 AS order_datetime#1078]
                                 :     :  :  +- Relation [user_id#1070L,merchant_abn#1071L,dollar_value#1072,order_id#1073] parquet
                                 :     :  :- Project [user_id#1095L, merchant_abn#1096L, dollar_value#1097, order_id#1098, 2021-04-10 AS order_datetime#1103]
                                 :     :  :  +- Relation [user_id#1095L,merchant_abn#1096L,dollar_value#1097,order_id#1098] parquet
                                 :     :  :- Project [user_id#1120L, merchant_abn#1121L, dollar_value#1122, order_id#1123, 2021-04-11 AS order_datetime#1128]
                                 :     :  :  +- Relation [user_id#1120L,merchant_abn#1121L,dollar_value#1122,order_id#1123] parquet
                                 :     :  :- Project [user_id#1145L, merchant_abn#1146L, dollar_value#1147, order_id#1148, 2021-04-12 AS order_datetime#1153]
                                 :     :  :  +- Relation [user_id#1145L,merchant_abn#1146L,dollar_value#1147,order_id#1148] parquet
                                 :     :  :- Project [user_id#1170L, merchant_abn#1171L, dollar_value#1172, order_id#1173, 2021-04-13 AS order_datetime#1178]
                                 :     :  :  +- Relation [user_id#1170L,merchant_abn#1171L,dollar_value#1172,order_id#1173] parquet
                                 :     :  :- Project [user_id#1195L, merchant_abn#1196L, dollar_value#1197, order_id#1198, 2021-04-14 AS order_datetime#1203]
                                 :     :  :  +- Relation [user_id#1195L,merchant_abn#1196L,dollar_value#1197,order_id#1198] parquet
                                 :     :  :- Project [user_id#1220L, merchant_abn#1221L, dollar_value#1222, order_id#1223, 2021-04-15 AS order_datetime#1228]
                                 :     :  :  +- Relation [user_id#1220L,merchant_abn#1221L,dollar_value#1222,order_id#1223] parquet
                                 :     :  :- Project [user_id#1245L, merchant_abn#1246L, dollar_value#1247, order_id#1248, 2021-04-16 AS order_datetime#1253]
                                 :     :  :  +- Relation [user_id#1245L,merchant_abn#1246L,dollar_value#1247,order_id#1248] parquet
                                 :     :  :- Project [user_id#1270L, merchant_abn#1271L, dollar_value#1272, order_id#1273, 2021-04-17 AS order_datetime#1278]
                                 :     :  :  +- Relation [user_id#1270L,merchant_abn#1271L,dollar_value#1272,order_id#1273] parquet
                                 :     :  :- Project [user_id#1295L, merchant_abn#1296L, dollar_value#1297, order_id#1298, 2021-04-18 AS order_datetime#1303]
                                 :     :  :  +- Relation [user_id#1295L,merchant_abn#1296L,dollar_value#1297,order_id#1298] parquet
                                 :     :  :- Project [user_id#1320L, merchant_abn#1321L, dollar_value#1322, order_id#1323, 2021-04-19 AS order_datetime#1328]
                                 :     :  :  +- Relation [user_id#1320L,merchant_abn#1321L,dollar_value#1322,order_id#1323] parquet
                                 :     :  :- Project [user_id#1345L, merchant_abn#1346L, dollar_value#1347, order_id#1348, 2021-04-20 AS order_datetime#1353]
                                 :     :  :  +- Relation [user_id#1345L,merchant_abn#1346L,dollar_value#1347,order_id#1348] parquet
                                 :     :  :- Project [user_id#1370L, merchant_abn#1371L, dollar_value#1372, order_id#1373, 2021-04-21 AS order_datetime#1378]
                                 :     :  :  +- Relation [user_id#1370L,merchant_abn#1371L,dollar_value#1372,order_id#1373] parquet
                                 :     :  :- Project [user_id#1395L, merchant_abn#1396L, dollar_value#1397, order_id#1398, 2021-04-22 AS order_datetime#1403]
                                 :     :  :  +- Relation [user_id#1395L,merchant_abn#1396L,dollar_value#1397,order_id#1398] parquet
                                 :     :  :- Project [user_id#1420L, merchant_abn#1421L, dollar_value#1422, order_id#1423, 2021-04-23 AS order_datetime#1428]
                                 :     :  :  +- Relation [user_id#1420L,merchant_abn#1421L,dollar_value#1422,order_id#1423] parquet
                                 :     :  :- Project [user_id#1445L, merchant_abn#1446L, dollar_value#1447, order_id#1448, 2021-04-24 AS order_datetime#1453]
                                 :     :  :  +- Relation [user_id#1445L,merchant_abn#1446L,dollar_value#1447,order_id#1448] parquet
                                 :     :  :- Project [user_id#1470L, merchant_abn#1471L, dollar_value#1472, order_id#1473, 2021-04-25 AS order_datetime#1478]
                                 :     :  :  +- Relation [user_id#1470L,merchant_abn#1471L,dollar_value#1472,order_id#1473] parquet
                                 :     :  :- Project [user_id#1495L, merchant_abn#1496L, dollar_value#1497, order_id#1498, 2021-04-26 AS order_datetime#1503]
                                 :     :  :  +- Relation [user_id#1495L,merchant_abn#1496L,dollar_value#1497,order_id#1498] parquet
                                 :     :  :- Project [user_id#1520L, merchant_abn#1521L, dollar_value#1522, order_id#1523, 2021-04-27 AS order_datetime#1528]
                                 :     :  :  +- Relation [user_id#1520L,merchant_abn#1521L,dollar_value#1522,order_id#1523] parquet
                                 :     :  :- Project [user_id#1545L, merchant_abn#1546L, dollar_value#1547, order_id#1548, 2021-04-28 AS order_datetime#1553]
                                 :     :  :  +- Relation [user_id#1545L,merchant_abn#1546L,dollar_value#1547,order_id#1548] parquet
                                 :     :  :- Project [user_id#1570L, merchant_abn#1571L, dollar_value#1572, order_id#1573, 2021-04-29 AS order_datetime#1578]
                                 :     :  :  +- Relation [user_id#1570L,merchant_abn#1571L,dollar_value#1572,order_id#1573] parquet
                                 :     :  :- Project [user_id#1595L, merchant_abn#1596L, dollar_value#1597, order_id#1598, 2021-04-30 AS order_datetime#1603]
                                 :     :  :  +- Relation [user_id#1595L,merchant_abn#1596L,dollar_value#1597,order_id#1598] parquet
                                 :     :  :- Project [user_id#1620L, merchant_abn#1621L, dollar_value#1622, order_id#1623, 2021-05-01 AS order_datetime#1628]
                                 :     :  :  +- Relation [user_id#1620L,merchant_abn#1621L,dollar_value#1622,order_id#1623] parquet
                                 :     :  :- Project [user_id#1645L, merchant_abn#1646L, dollar_value#1647, order_id#1648, 2021-05-02 AS order_datetime#1653]
                                 :     :  :  +- Relation [user_id#1645L,merchant_abn#1646L,dollar_value#1647,order_id#1648] parquet
                                 :     :  :- Project [user_id#1670L, merchant_abn#1671L, dollar_value#1672, order_id#1673, 2021-05-03 AS order_datetime#1678]
                                 :     :  :  +- Relation [user_id#1670L,merchant_abn#1671L,dollar_value#1672,order_id#1673] parquet
                                 :     :  :- Project [user_id#1695L, merchant_abn#1696L, dollar_value#1697, order_id#1698, 2021-05-04 AS order_datetime#1703]
                                 :     :  :  +- Relation [user_id#1695L,merchant_abn#1696L,dollar_value#1697,order_id#1698] parquet
                                 :     :  :- Project [user_id#1720L, merchant_abn#1721L, dollar_value#1722, order_id#1723, 2021-05-05 AS order_datetime#1728]
                                 :     :  :  +- Relation [user_id#1720L,merchant_abn#1721L,dollar_value#1722,order_id#1723] parquet
                                 :     :  :- Project [user_id#1745L, merchant_abn#1746L, dollar_value#1747, order_id#1748, 2021-05-06 AS order_datetime#1753]
                                 :     :  :  +- Relation [user_id#1745L,merchant_abn#1746L,dollar_value#1747,order_id#1748] parquet
                                 :     :  :- Project [user_id#1770L, merchant_abn#1771L, dollar_value#1772, order_id#1773, 2021-05-07 AS order_datetime#1778]
                                 :     :  :  +- Relation [user_id#1770L,merchant_abn#1771L,dollar_value#1772,order_id#1773] parquet
                                 :     :  :- Project [user_id#1795L, merchant_abn#1796L, dollar_value#1797, order_id#1798, 2021-05-08 AS order_datetime#1803]
                                 :     :  :  +- Relation [user_id#1795L,merchant_abn#1796L,dollar_value#1797,order_id#1798] parquet
                                 :     :  :- Project [user_id#1820L, merchant_abn#1821L, dollar_value#1822, order_id#1823, 2021-05-09 AS order_datetime#1828]
                                 :     :  :  +- Relation [user_id#1820L,merchant_abn#1821L,dollar_value#1822,order_id#1823] parquet
                                 :     :  :- Project [user_id#1845L, merchant_abn#1846L, dollar_value#1847, order_id#1848, 2021-05-10 AS order_datetime#1853]
                                 :     :  :  +- Relation [user_id#1845L,merchant_abn#1846L,dollar_value#1847,order_id#1848] parquet
                                 :     :  :- Project [user_id#1870L, merchant_abn#1871L, dollar_value#1872, order_id#1873, 2021-05-11 AS order_datetime#1878]
                                 :     :  :  +- Relation [user_id#1870L,merchant_abn#1871L,dollar_value#1872,order_id#1873] parquet
                                 :     :  :- Project [user_id#1895L, merchant_abn#1896L, dollar_value#1897, order_id#1898, 2021-05-12 AS order_datetime#1903]
                                 :     :  :  +- Relation [user_id#1895L,merchant_abn#1896L,dollar_value#1897,order_id#1898] parquet
                                 :     :  :- Project [user_id#1920L, merchant_abn#1921L, dollar_value#1922, order_id#1923, 2021-05-13 AS order_datetime#1928]
                                 :     :  :  +- Relation [user_id#1920L,merchant_abn#1921L,dollar_value#1922,order_id#1923] parquet
                                 :     :  :- Project [user_id#1945L, merchant_abn#1946L, dollar_value#1947, order_id#1948, 2021-05-14 AS order_datetime#1953]
                                 :     :  :  +- Relation [user_id#1945L,merchant_abn#1946L,dollar_value#1947,order_id#1948] parquet
                                 :     :  :- Project [user_id#1970L, merchant_abn#1971L, dollar_value#1972, order_id#1973, 2021-05-15 AS order_datetime#1978]
                                 :     :  :  +- Relation [user_id#1970L,merchant_abn#1971L,dollar_value#1972,order_id#1973] parquet
                                 :     :  :- Project [user_id#1995L, merchant_abn#1996L, dollar_value#1997, order_id#1998, 2021-05-16 AS order_datetime#2003]
                                 :     :  :  +- Relation [user_id#1995L,merchant_abn#1996L,dollar_value#1997,order_id#1998] parquet
                                 :     :  :- Project [user_id#2020L, merchant_abn#2021L, dollar_value#2022, order_id#2023, 2021-05-17 AS order_datetime#2028]
                                 :     :  :  +- Relation [user_id#2020L,merchant_abn#2021L,dollar_value#2022,order_id#2023] parquet
                                 :     :  :- Project [user_id#2045L, merchant_abn#2046L, dollar_value#2047, order_id#2048, 2021-05-18 AS order_datetime#2053]
                                 :     :  :  +- Relation [user_id#2045L,merchant_abn#2046L,dollar_value#2047,order_id#2048] parquet
                                 :     :  :- Project [user_id#2070L, merchant_abn#2071L, dollar_value#2072, order_id#2073, 2021-05-19 AS order_datetime#2078]
                                 :     :  :  +- Relation [user_id#2070L,merchant_abn#2071L,dollar_value#2072,order_id#2073] parquet
                                 :     :  :- Project [user_id#2095L, merchant_abn#2096L, dollar_value#2097, order_id#2098, 2021-05-20 AS order_datetime#2103]
                                 :     :  :  +- Relation [user_id#2095L,merchant_abn#2096L,dollar_value#2097,order_id#2098] parquet
                                 :     :  :- Project [user_id#2120L, merchant_abn#2121L, dollar_value#2122, order_id#2123, 2021-05-21 AS order_datetime#2128]
                                 :     :  :  +- Relation [user_id#2120L,merchant_abn#2121L,dollar_value#2122,order_id#2123] parquet
                                 :     :  :- Project [user_id#2145L, merchant_abn#2146L, dollar_value#2147, order_id#2148, 2021-05-22 AS order_datetime#2153]
                                 :     :  :  +- Relation [user_id#2145L,merchant_abn#2146L,dollar_value#2147,order_id#2148] parquet
                                 :     :  :- Project [user_id#2170L, merchant_abn#2171L, dollar_value#2172, order_id#2173, 2021-05-23 AS order_datetime#2178]
                                 :     :  :  +- Relation [user_id#2170L,merchant_abn#2171L,dollar_value#2172,order_id#2173] parquet
                                 :     :  :- Project [user_id#2195L, merchant_abn#2196L, dollar_value#2197, order_id#2198, 2021-05-24 AS order_datetime#2203]
                                 :     :  :  +- Relation [user_id#2195L,merchant_abn#2196L,dollar_value#2197,order_id#2198] parquet
                                 :     :  :- Project [user_id#2220L, merchant_abn#2221L, dollar_value#2222, order_id#2223, 2021-05-25 AS order_datetime#2228]
                                 :     :  :  +- Relation [user_id#2220L,merchant_abn#2221L,dollar_value#2222,order_id#2223] parquet
                                 :     :  :- Project [user_id#2245L, merchant_abn#2246L, dollar_value#2247, order_id#2248, 2021-05-26 AS order_datetime#2253]
                                 :     :  :  +- Relation [user_id#2245L,merchant_abn#2246L,dollar_value#2247,order_id#2248] parquet
                                 :     :  :- Project [user_id#2270L, merchant_abn#2271L, dollar_value#2272, order_id#2273, 2021-05-27 AS order_datetime#2278]
                                 :     :  :  +- Relation [user_id#2270L,merchant_abn#2271L,dollar_value#2272,order_id#2273] parquet
                                 :     :  :- Project [user_id#2295L, merchant_abn#2296L, dollar_value#2297, order_id#2298, 2021-05-28 AS order_datetime#2303]
                                 :     :  :  +- Relation [user_id#2295L,merchant_abn#2296L,dollar_value#2297,order_id#2298] parquet
                                 :     :  :- Project [user_id#2320L, merchant_abn#2321L, dollar_value#2322, order_id#2323, 2021-05-29 AS order_datetime#2328]
                                 :     :  :  +- Relation [user_id#2320L,merchant_abn#2321L,dollar_value#2322,order_id#2323] parquet
                                 :     :  :- Project [user_id#2345L, merchant_abn#2346L, dollar_value#2347, order_id#2348, 2021-05-30 AS order_datetime#2353]
                                 :     :  :  +- Relation [user_id#2345L,merchant_abn#2346L,dollar_value#2347,order_id#2348] parquet
                                 :     :  :- Project [user_id#2370L, merchant_abn#2371L, dollar_value#2372, order_id#2373, 2021-05-31 AS order_datetime#2378]
                                 :     :  :  +- Relation [user_id#2370L,merchant_abn#2371L,dollar_value#2372,order_id#2373] parquet
                                 :     :  :- Project [user_id#2395L, merchant_abn#2396L, dollar_value#2397, order_id#2398, 2021-06-01 AS order_datetime#2403]
                                 :     :  :  +- Relation [user_id#2395L,merchant_abn#2396L,dollar_value#2397,order_id#2398] parquet
                                 :     :  :- Project [user_id#2420L, merchant_abn#2421L, dollar_value#2422, order_id#2423, 2021-06-02 AS order_datetime#2428]
                                 :     :  :  +- Relation [user_id#2420L,merchant_abn#2421L,dollar_value#2422,order_id#2423] parquet
                                 :     :  :- Project [user_id#2445L, merchant_abn#2446L, dollar_value#2447, order_id#2448, 2021-06-03 AS order_datetime#2453]
                                 :     :  :  +- Relation [user_id#2445L,merchant_abn#2446L,dollar_value#2447,order_id#2448] parquet
                                 :     :  :- Project [user_id#2470L, merchant_abn#2471L, dollar_value#2472, order_id#2473, 2021-06-04 AS order_datetime#2478]
                                 :     :  :  +- Relation [user_id#2470L,merchant_abn#2471L,dollar_value#2472,order_id#2473] parquet
                                 :     :  :- Project [user_id#2495L, merchant_abn#2496L, dollar_value#2497, order_id#2498, 2021-06-05 AS order_datetime#2503]
                                 :     :  :  +- Relation [user_id#2495L,merchant_abn#2496L,dollar_value#2497,order_id#2498] parquet
                                 :     :  :- Project [user_id#2520L, merchant_abn#2521L, dollar_value#2522, order_id#2523, 2021-06-06 AS order_datetime#2528]
                                 :     :  :  +- Relation [user_id#2520L,merchant_abn#2521L,dollar_value#2522,order_id#2523] parquet
                                 :     :  :- Project [user_id#2545L, merchant_abn#2546L, dollar_value#2547, order_id#2548, 2021-06-07 AS order_datetime#2553]
                                 :     :  :  +- Relation [user_id#2545L,merchant_abn#2546L,dollar_value#2547,order_id#2548] parquet
                                 :     :  :- Project [user_id#2570L, merchant_abn#2571L, dollar_value#2572, order_id#2573, 2021-06-08 AS order_datetime#2578]
                                 :     :  :  +- Relation [user_id#2570L,merchant_abn#2571L,dollar_value#2572,order_id#2573] parquet
                                 :     :  :- Project [user_id#2595L, merchant_abn#2596L, dollar_value#2597, order_id#2598, 2021-06-09 AS order_datetime#2603]
                                 :     :  :  +- Relation [user_id#2595L,merchant_abn#2596L,dollar_value#2597,order_id#2598] parquet
                                 :     :  :- Project [user_id#2620L, merchant_abn#2621L, dollar_value#2622, order_id#2623, 2021-06-10 AS order_datetime#2628]
                                 :     :  :  +- Relation [user_id#2620L,merchant_abn#2621L,dollar_value#2622,order_id#2623] parquet
                                 :     :  :- Project [user_id#2645L, merchant_abn#2646L, dollar_value#2647, order_id#2648, 2021-06-11 AS order_datetime#2653]
                                 :     :  :  +- Relation [user_id#2645L,merchant_abn#2646L,dollar_value#2647,order_id#2648] parquet
                                 :     :  :- Project [user_id#2670L, merchant_abn#2671L, dollar_value#2672, order_id#2673, 2021-06-12 AS order_datetime#2678]
                                 :     :  :  +- Relation [user_id#2670L,merchant_abn#2671L,dollar_value#2672,order_id#2673] parquet
                                 :     :  :- Project [user_id#2695L, merchant_abn#2696L, dollar_value#2697, order_id#2698, 2021-06-13 AS order_datetime#2703]
                                 :     :  :  +- Relation [user_id#2695L,merchant_abn#2696L,dollar_value#2697,order_id#2698] parquet
                                 :     :  :- Project [user_id#2720L, merchant_abn#2721L, dollar_value#2722, order_id#2723, 2021-06-14 AS order_datetime#2728]
                                 :     :  :  +- Relation [user_id#2720L,merchant_abn#2721L,dollar_value#2722,order_id#2723] parquet
                                 :     :  :- Project [user_id#2745L, merchant_abn#2746L, dollar_value#2747, order_id#2748, 2021-06-15 AS order_datetime#2753]
                                 :     :  :  +- Relation [user_id#2745L,merchant_abn#2746L,dollar_value#2747,order_id#2748] parquet
                                 :     :  :- Project [user_id#2770L, merchant_abn#2771L, dollar_value#2772, order_id#2773, 2021-06-16 AS order_datetime#2778]
                                 :     :  :  +- Relation [user_id#2770L,merchant_abn#2771L,dollar_value#2772,order_id#2773] parquet
                                 :     :  :- Project [user_id#2795L, merchant_abn#2796L, dollar_value#2797, order_id#2798, 2021-06-17 AS order_datetime#2803]
                                 :     :  :  +- Relation [user_id#2795L,merchant_abn#2796L,dollar_value#2797,order_id#2798] parquet
                                 :     :  :- Project [user_id#2820L, merchant_abn#2821L, dollar_value#2822, order_id#2823, 2021-06-18 AS order_datetime#2828]
                                 :     :  :  +- Relation [user_id#2820L,merchant_abn#2821L,dollar_value#2822,order_id#2823] parquet
                                 :     :  :- Project [user_id#2845L, merchant_abn#2846L, dollar_value#2847, order_id#2848, 2021-06-19 AS order_datetime#2853]
                                 :     :  :  +- Relation [user_id#2845L,merchant_abn#2846L,dollar_value#2847,order_id#2848] parquet
                                 :     :  :- Project [user_id#2870L, merchant_abn#2871L, dollar_value#2872, order_id#2873, 2021-06-20 AS order_datetime#2878]
                                 :     :  :  +- Relation [user_id#2870L,merchant_abn#2871L,dollar_value#2872,order_id#2873] parquet
                                 :     :  :- Project [user_id#2895L, merchant_abn#2896L, dollar_value#2897, order_id#2898, 2021-06-21 AS order_datetime#2903]
                                 :     :  :  +- Relation [user_id#2895L,merchant_abn#2896L,dollar_value#2897,order_id#2898] parquet
                                 :     :  :- Project [user_id#2920L, merchant_abn#2921L, dollar_value#2922, order_id#2923, 2021-06-22 AS order_datetime#2928]
                                 :     :  :  +- Relation [user_id#2920L,merchant_abn#2921L,dollar_value#2922,order_id#2923] parquet
                                 :     :  :- Project [user_id#2945L, merchant_abn#2946L, dollar_value#2947, order_id#2948, 2021-06-23 AS order_datetime#2953]
                                 :     :  :  +- Relation [user_id#2945L,merchant_abn#2946L,dollar_value#2947,order_id#2948] parquet
                                 :     :  :- Project [user_id#2970L, merchant_abn#2971L, dollar_value#2972, order_id#2973, 2021-06-24 AS order_datetime#2978]
                                 :     :  :  +- Relation [user_id#2970L,merchant_abn#2971L,dollar_value#2972,order_id#2973] parquet
                                 :     :  :- Project [user_id#2995L, merchant_abn#2996L, dollar_value#2997, order_id#2998, 2021-06-25 AS order_datetime#3003]
                                 :     :  :  +- Relation [user_id#2995L,merchant_abn#2996L,dollar_value#2997,order_id#2998] parquet
                                 :     :  :- Project [user_id#3020L, merchant_abn#3021L, dollar_value#3022, order_id#3023, 2021-06-26 AS order_datetime#3028]
                                 :     :  :  +- Relation [user_id#3020L,merchant_abn#3021L,dollar_value#3022,order_id#3023] parquet
                                 :     :  :- Project [user_id#3045L, merchant_abn#3046L, dollar_value#3047, order_id#3048, 2021-06-27 AS order_datetime#3053]
                                 :     :  :  +- Relation [user_id#3045L,merchant_abn#3046L,dollar_value#3047,order_id#3048] parquet
                                 :     :  :- Project [user_id#3070L, merchant_abn#3071L, dollar_value#3072, order_id#3073, 2021-06-28 AS order_datetime#3078]
                                 :     :  :  +- Relation [user_id#3070L,merchant_abn#3071L,dollar_value#3072,order_id#3073] parquet
                                 :     :  :- Project [user_id#3095L, merchant_abn#3096L, dollar_value#3097, order_id#3098, 2021-06-29 AS order_datetime#3103]
                                 :     :  :  +- Relation [user_id#3095L,merchant_abn#3096L,dollar_value#3097,order_id#3098] parquet
                                 :     :  :- Project [user_id#3120L, merchant_abn#3121L, dollar_value#3122, order_id#3123, 2021-06-30 AS order_datetime#3128]
                                 :     :  :  +- Relation [user_id#3120L,merchant_abn#3121L,dollar_value#3122,order_id#3123] parquet
                                 :     :  :- Project [user_id#3145L, merchant_abn#3146L, dollar_value#3147, order_id#3148, 2021-07-01 AS order_datetime#3153]
                                 :     :  :  +- Relation [user_id#3145L,merchant_abn#3146L,dollar_value#3147,order_id#3148] parquet
                                 :     :  :- Project [user_id#3170L, merchant_abn#3171L, dollar_value#3172, order_id#3173, 2021-07-02 AS order_datetime#3178]
                                 :     :  :  +- Relation [user_id#3170L,merchant_abn#3171L,dollar_value#3172,order_id#3173] parquet
                                 :     :  :- Project [user_id#3195L, merchant_abn#3196L, dollar_value#3197, order_id#3198, 2021-07-03 AS order_datetime#3203]
                                 :     :  :  +- Relation [user_id#3195L,merchant_abn#3196L,dollar_value#3197,order_id#3198] parquet
                                 :     :  :- Project [user_id#3220L, merchant_abn#3221L, dollar_value#3222, order_id#3223, 2021-07-04 AS order_datetime#3228]
                                 :     :  :  +- Relation [user_id#3220L,merchant_abn#3221L,dollar_value#3222,order_id#3223] parquet
                                 :     :  :- Project [user_id#3245L, merchant_abn#3246L, dollar_value#3247, order_id#3248, 2021-07-05 AS order_datetime#3253]
                                 :     :  :  +- Relation [user_id#3245L,merchant_abn#3246L,dollar_value#3247,order_id#3248] parquet
                                 :     :  :- Project [user_id#3270L, merchant_abn#3271L, dollar_value#3272, order_id#3273, 2021-07-06 AS order_datetime#3278]
                                 :     :  :  +- Relation [user_id#3270L,merchant_abn#3271L,dollar_value#3272,order_id#3273] parquet
                                 :     :  :- Project [user_id#3295L, merchant_abn#3296L, dollar_value#3297, order_id#3298, 2021-07-07 AS order_datetime#3303]
                                 :     :  :  +- Relation [user_id#3295L,merchant_abn#3296L,dollar_value#3297,order_id#3298] parquet
                                 :     :  :- Project [user_id#3320L, merchant_abn#3321L, dollar_value#3322, order_id#3323, 2021-07-08 AS order_datetime#3328]
                                 :     :  :  +- Relation [user_id#3320L,merchant_abn#3321L,dollar_value#3322,order_id#3323] parquet
                                 :     :  :- Project [user_id#3345L, merchant_abn#3346L, dollar_value#3347, order_id#3348, 2021-07-09 AS order_datetime#3353]
                                 :     :  :  +- Relation [user_id#3345L,merchant_abn#3346L,dollar_value#3347,order_id#3348] parquet
                                 :     :  :- Project [user_id#3370L, merchant_abn#3371L, dollar_value#3372, order_id#3373, 2021-07-10 AS order_datetime#3378]
                                 :     :  :  +- Relation [user_id#3370L,merchant_abn#3371L,dollar_value#3372,order_id#3373] parquet
                                 :     :  :- Project [user_id#3395L, merchant_abn#3396L, dollar_value#3397, order_id#3398, 2021-07-11 AS order_datetime#3403]
                                 :     :  :  +- Relation [user_id#3395L,merchant_abn#3396L,dollar_value#3397,order_id#3398] parquet
                                 :     :  :- Project [user_id#3420L, merchant_abn#3421L, dollar_value#3422, order_id#3423, 2021-07-12 AS order_datetime#3428]
                                 :     :  :  +- Relation [user_id#3420L,merchant_abn#3421L,dollar_value#3422,order_id#3423] parquet
                                 :     :  :- Project [user_id#3445L, merchant_abn#3446L, dollar_value#3447, order_id#3448, 2021-07-13 AS order_datetime#3453]
                                 :     :  :  +- Relation [user_id#3445L,merchant_abn#3446L,dollar_value#3447,order_id#3448] parquet
                                 :     :  :- Project [user_id#3470L, merchant_abn#3471L, dollar_value#3472, order_id#3473, 2021-07-14 AS order_datetime#3478]
                                 :     :  :  +- Relation [user_id#3470L,merchant_abn#3471L,dollar_value#3472,order_id#3473] parquet
                                 :     :  :- Project [user_id#3495L, merchant_abn#3496L, dollar_value#3497, order_id#3498, 2021-07-15 AS order_datetime#3503]
                                 :     :  :  +- Relation [user_id#3495L,merchant_abn#3496L,dollar_value#3497,order_id#3498] parquet
                                 :     :  :- Project [user_id#3520L, merchant_abn#3521L, dollar_value#3522, order_id#3523, 2021-07-16 AS order_datetime#3528]
                                 :     :  :  +- Relation [user_id#3520L,merchant_abn#3521L,dollar_value#3522,order_id#3523] parquet
                                 :     :  :- Project [user_id#3545L, merchant_abn#3546L, dollar_value#3547, order_id#3548, 2021-07-17 AS order_datetime#3553]
                                 :     :  :  +- Relation [user_id#3545L,merchant_abn#3546L,dollar_value#3547,order_id#3548] parquet
                                 :     :  :- Project [user_id#3570L, merchant_abn#3571L, dollar_value#3572, order_id#3573, 2021-07-18 AS order_datetime#3578]
                                 :     :  :  +- Relation [user_id#3570L,merchant_abn#3571L,dollar_value#3572,order_id#3573] parquet
                                 :     :  :- Project [user_id#3595L, merchant_abn#3596L, dollar_value#3597, order_id#3598, 2021-07-19 AS order_datetime#3603]
                                 :     :  :  +- Relation [user_id#3595L,merchant_abn#3596L,dollar_value#3597,order_id#3598] parquet
                                 :     :  :- Project [user_id#3620L, merchant_abn#3621L, dollar_value#3622, order_id#3623, 2021-07-20 AS order_datetime#3628]
                                 :     :  :  +- Relation [user_id#3620L,merchant_abn#3621L,dollar_value#3622,order_id#3623] parquet
                                 :     :  :- Project [user_id#3645L, merchant_abn#3646L, dollar_value#3647, order_id#3648, 2021-07-21 AS order_datetime#3653]
                                 :     :  :  +- Relation [user_id#3645L,merchant_abn#3646L,dollar_value#3647,order_id#3648] parquet
                                 :     :  :- Project [user_id#3670L, merchant_abn#3671L, dollar_value#3672, order_id#3673, 2021-07-22 AS order_datetime#3678]
                                 :     :  :  +- Relation [user_id#3670L,merchant_abn#3671L,dollar_value#3672,order_id#3673] parquet
                                 :     :  :- Project [user_id#3695L, merchant_abn#3696L, dollar_value#3697, order_id#3698, 2021-07-23 AS order_datetime#3703]
                                 :     :  :  +- Relation [user_id#3695L,merchant_abn#3696L,dollar_value#3697,order_id#3698] parquet
                                 :     :  :- Project [user_id#3720L, merchant_abn#3721L, dollar_value#3722, order_id#3723, 2021-07-24 AS order_datetime#3728]
                                 :     :  :  +- Relation [user_id#3720L,merchant_abn#3721L,dollar_value#3722,order_id#3723] parquet
                                 :     :  :- Project [user_id#3745L, merchant_abn#3746L, dollar_value#3747, order_id#3748, 2021-07-25 AS order_datetime#3753]
                                 :     :  :  +- Relation [user_id#3745L,merchant_abn#3746L,dollar_value#3747,order_id#3748] parquet
                                 :     :  :- Project [user_id#3770L, merchant_abn#3771L, dollar_value#3772, order_id#3773, 2021-07-26 AS order_datetime#3778]
                                 :     :  :  +- Relation [user_id#3770L,merchant_abn#3771L,dollar_value#3772,order_id#3773] parquet
                                 :     :  :- Project [user_id#3795L, merchant_abn#3796L, dollar_value#3797, order_id#3798, 2021-07-27 AS order_datetime#3803]
                                 :     :  :  +- Relation [user_id#3795L,merchant_abn#3796L,dollar_value#3797,order_id#3798] parquet
                                 :     :  :- Project [user_id#3820L, merchant_abn#3821L, dollar_value#3822, order_id#3823, 2021-07-28 AS order_datetime#3828]
                                 :     :  :  +- Relation [user_id#3820L,merchant_abn#3821L,dollar_value#3822,order_id#3823] parquet
                                 :     :  :- Project [user_id#3845L, merchant_abn#3846L, dollar_value#3847, order_id#3848, 2021-07-29 AS order_datetime#3853]
                                 :     :  :  +- Relation [user_id#3845L,merchant_abn#3846L,dollar_value#3847,order_id#3848] parquet
                                 :     :  :- Project [user_id#3870L, merchant_abn#3871L, dollar_value#3872, order_id#3873, 2021-07-30 AS order_datetime#3878]
                                 :     :  :  +- Relation [user_id#3870L,merchant_abn#3871L,dollar_value#3872,order_id#3873] parquet
                                 :     :  :- Project [user_id#3895L, merchant_abn#3896L, dollar_value#3897, order_id#3898, 2021-07-31 AS order_datetime#3903]
                                 :     :  :  +- Relation [user_id#3895L,merchant_abn#3896L,dollar_value#3897,order_id#3898] parquet
                                 :     :  :- Project [user_id#3920L, merchant_abn#3921L, dollar_value#3922, order_id#3923, 2021-08-01 AS order_datetime#3928]
                                 :     :  :  +- Relation [user_id#3920L,merchant_abn#3921L,dollar_value#3922,order_id#3923] parquet
                                 :     :  :- Project [user_id#3945L, merchant_abn#3946L, dollar_value#3947, order_id#3948, 2021-08-02 AS order_datetime#3953]
                                 :     :  :  +- Relation [user_id#3945L,merchant_abn#3946L,dollar_value#3947,order_id#3948] parquet
                                 :     :  :- Project [user_id#3970L, merchant_abn#3971L, dollar_value#3972, order_id#3973, 2021-08-03 AS order_datetime#3978]
                                 :     :  :  +- Relation [user_id#3970L,merchant_abn#3971L,dollar_value#3972,order_id#3973] parquet
                                 :     :  :- Project [user_id#3995L, merchant_abn#3996L, dollar_value#3997, order_id#3998, 2021-08-04 AS order_datetime#4003]
                                 :     :  :  +- Relation [user_id#3995L,merchant_abn#3996L,dollar_value#3997,order_id#3998] parquet
                                 :     :  :- Project [user_id#4020L, merchant_abn#4021L, dollar_value#4022, order_id#4023, 2021-08-05 AS order_datetime#4028]
                                 :     :  :  +- Relation [user_id#4020L,merchant_abn#4021L,dollar_value#4022,order_id#4023] parquet
                                 :     :  :- Project [user_id#4045L, merchant_abn#4046L, dollar_value#4047, order_id#4048, 2021-08-06 AS order_datetime#4053]
                                 :     :  :  +- Relation [user_id#4045L,merchant_abn#4046L,dollar_value#4047,order_id#4048] parquet
                                 :     :  :- Project [user_id#4070L, merchant_abn#4071L, dollar_value#4072, order_id#4073, 2021-08-07 AS order_datetime#4078]
                                 :     :  :  +- Relation [user_id#4070L,merchant_abn#4071L,dollar_value#4072,order_id#4073] parquet
                                 :     :  :- Project [user_id#4095L, merchant_abn#4096L, dollar_value#4097, order_id#4098, 2021-08-08 AS order_datetime#4103]
                                 :     :  :  +- Relation [user_id#4095L,merchant_abn#4096L,dollar_value#4097,order_id#4098] parquet
                                 :     :  :- Project [user_id#4120L, merchant_abn#4121L, dollar_value#4122, order_id#4123, 2021-08-09 AS order_datetime#4128]
                                 :     :  :  +- Relation [user_id#4120L,merchant_abn#4121L,dollar_value#4122,order_id#4123] parquet
                                 :     :  :- Project [user_id#4145L, merchant_abn#4146L, dollar_value#4147, order_id#4148, 2021-08-10 AS order_datetime#4153]
                                 :     :  :  +- Relation [user_id#4145L,merchant_abn#4146L,dollar_value#4147,order_id#4148] parquet
                                 :     :  :- Project [user_id#4170L, merchant_abn#4171L, dollar_value#4172, order_id#4173, 2021-08-11 AS order_datetime#4178]
                                 :     :  :  +- Relation [user_id#4170L,merchant_abn#4171L,dollar_value#4172,order_id#4173] parquet
                                 :     :  :- Project [user_id#4195L, merchant_abn#4196L, dollar_value#4197, order_id#4198, 2021-08-12 AS order_datetime#4203]
                                 :     :  :  +- Relation [user_id#4195L,merchant_abn#4196L,dollar_value#4197,order_id#4198] parquet
                                 :     :  :- Project [user_id#4220L, merchant_abn#4221L, dollar_value#4222, order_id#4223, 2021-08-13 AS order_datetime#4228]
                                 :     :  :  +- Relation [user_id#4220L,merchant_abn#4221L,dollar_value#4222,order_id#4223] parquet
                                 :     :  :- Project [user_id#4245L, merchant_abn#4246L, dollar_value#4247, order_id#4248, 2021-08-14 AS order_datetime#4253]
                                 :     :  :  +- Relation [user_id#4245L,merchant_abn#4246L,dollar_value#4247,order_id#4248] parquet
                                 :     :  :- Project [user_id#4270L, merchant_abn#4271L, dollar_value#4272, order_id#4273, 2021-08-15 AS order_datetime#4278]
                                 :     :  :  +- Relation [user_id#4270L,merchant_abn#4271L,dollar_value#4272,order_id#4273] parquet
                                 :     :  :- Project [user_id#4295L, merchant_abn#4296L, dollar_value#4297, order_id#4298, 2021-08-16 AS order_datetime#4303]
                                 :     :  :  +- Relation [user_id#4295L,merchant_abn#4296L,dollar_value#4297,order_id#4298] parquet
                                 :     :  :- Project [user_id#4320L, merchant_abn#4321L, dollar_value#4322, order_id#4323, 2021-08-17 AS order_datetime#4328]
                                 :     :  :  +- Relation [user_id#4320L,merchant_abn#4321L,dollar_value#4322,order_id#4323] parquet
                                 :     :  :- Project [user_id#4345L, merchant_abn#4346L, dollar_value#4347, order_id#4348, 2021-08-18 AS order_datetime#4353]
                                 :     :  :  +- Relation [user_id#4345L,merchant_abn#4346L,dollar_value#4347,order_id#4348] parquet
                                 :     :  :- Project [user_id#4370L, merchant_abn#4371L, dollar_value#4372, order_id#4373, 2021-08-19 AS order_datetime#4378]
                                 :     :  :  +- Relation [user_id#4370L,merchant_abn#4371L,dollar_value#4372,order_id#4373] parquet
                                 :     :  :- Project [user_id#4395L, merchant_abn#4396L, dollar_value#4397, order_id#4398, 2021-08-20 AS order_datetime#4403]
                                 :     :  :  +- Relation [user_id#4395L,merchant_abn#4396L,dollar_value#4397,order_id#4398] parquet
                                 :     :  :- Project [user_id#4420L, merchant_abn#4421L, dollar_value#4422, order_id#4423, 2021-08-21 AS order_datetime#4428]
                                 :     :  :  +- Relation [user_id#4420L,merchant_abn#4421L,dollar_value#4422,order_id#4423] parquet
                                 :     :  :- Project [user_id#4445L, merchant_abn#4446L, dollar_value#4447, order_id#4448, 2021-08-22 AS order_datetime#4453]
                                 :     :  :  +- Relation [user_id#4445L,merchant_abn#4446L,dollar_value#4447,order_id#4448] parquet
                                 :     :  :- Project [user_id#4470L, merchant_abn#4471L, dollar_value#4472, order_id#4473, 2021-08-23 AS order_datetime#4478]
                                 :     :  :  +- Relation [user_id#4470L,merchant_abn#4471L,dollar_value#4472,order_id#4473] parquet
                                 :     :  :- Project [user_id#4495L, merchant_abn#4496L, dollar_value#4497, order_id#4498, 2021-08-24 AS order_datetime#4503]
                                 :     :  :  +- Relation [user_id#4495L,merchant_abn#4496L,dollar_value#4497,order_id#4498] parquet
                                 :     :  :- Project [user_id#4520L, merchant_abn#4521L, dollar_value#4522, order_id#4523, 2021-08-25 AS order_datetime#4528]
                                 :     :  :  +- Relation [user_id#4520L,merchant_abn#4521L,dollar_value#4522,order_id#4523] parquet
                                 :     :  :- Project [user_id#4545L, merchant_abn#4546L, dollar_value#4547, order_id#4548, 2021-08-26 AS order_datetime#4553]
                                 :     :  :  +- Relation [user_id#4545L,merchant_abn#4546L,dollar_value#4547,order_id#4548] parquet
                                 :     :  +- Project [user_id#4570L, merchant_abn#4571L, dollar_value#4572, order_id#4573, 2021-08-27 AS order_datetime#4578]
                                 :     :     +- Relation [user_id#4570L,merchant_abn#4571L,dollar_value#4572,order_id#4573] parquet
                                 :     +- Relation [user_id#6873L,consumer_id#6874L] parquet
                                 +- Relation [name#17,address#18,state#19,postcode#20,gender#21,consumer_id#22] csv


In [None]:
customer_transaction_count_mean.createOrReplaceTempView("customer_transaction_count_mean")


customer_transaction_count_mean = spark.sql(
    """
    Select  user_id, merchant_abn,
    avg(count_user_id) as mean_visited_user 

    from customer_transaction_count_mean
    group by user_id, merchant_abn 
    order by  mean_visited_user desc
    """)
customer_transaction_count_mean

In [29]:
vip = join.withColumn("VIP", \
   when((join.count_user_id > join.mean_visited_user), lit("1").cast(StringType())) \
     .otherwise(lit("0").cast(StringType())) \
  )
vip

user_id,merchant_abn,count_user_id,merchant_abn.1,mean_visited_user,VIP
21223,49891706470,11,49891706470,2.843536198184501,1
5024,43186523025,12,43186523025,2.426085320539956,1
1120,45629217853,11,45629217853,2.640505638413969,1
1782,45629217853,11,45629217853,2.640505638413969,1
8009,86578477987,11,86578477987,3.060587461639632,1
11998,86578477987,11,86578477987,3.060587461639632,1
8951,86578477987,11,86578477987,3.060587461639632,1
8633,86578477987,11,86578477987,3.060587461639632,1
21272,86578477987,11,86578477987,3.060587461639632,1
9928,64203420245,11,64203420245,2.958622518898369,1


In [30]:
vip.createOrReplaceTempView("vip")


vip_count = spark.sql(
    """
    Select   merchant_abn,
    sum(VIP) as sum_vip 

    from vip
    group by merchant_abn 
    order by  sum_vip desc
    """)
vip_count

AnalysisException: Reference 'merchant_abn' is ambiguous, could be: vip.merchant_abn, vip.merchant_abn.; line 6 pos 13