In [1]:
import pandas as pd
from functools import reduce

In [2]:
# read data
num_consumer = pd.read_csv('../data/curated/pred_total_num_consumer.csv')
num_transaction = pd.read_csv('../data/curated/pred_total_num_transaction.csv')
revenue = pd.read_csv('../data/curated/pred_total_revenue.csv')
merchant_info = pd.read_csv('../data/curated/merchant.csv')

# merge three data frames based on merchant abn
data_frames = [merchant_info, num_consumer, num_transaction, revenue]
df_merged = reduce(lambda left,right: pd.merge(left,right,on=['merchant_abn'], how='outer'), data_frames)
df_merged

Unnamed: 0,merchant_abn,name,tags,revenue_level,take_rate,y_pred_total_num_consumer,y_pred_total_num_transaction,y_pred_total_revenue
0,10023283211,Felis Limited,furniture,e,0.18,2130.976342,2215.326353,6.306584e+05
1,10142254217,Arcu Ac Orci Corporation,cable,b,4.22,1904.673169,1983.119759,3.104424e+05
2,10165489824,Nunc Sed Company,jewelry,b,4.40,,,
3,10187291046,Ultricies Dignissim Lacus Foundation,watch,b,3.29,232.572609,231.071404,8.055615e+04
4,10192359162,Enim Condimentum PC,music,a,6.33,242.355650,239.045462,6.899121e+05
...,...,...,...,...,...,...,...,...
4021,99938978285,Elit Dictum Eu Ltd,opticians,b,4.50,9311.509743,11823.555676,1.513780e+06
4022,99974311662,Mollis LLP,books,b,3.17,97.286540,101.514524,9.482091e+04
4023,99976658299,Sociosqu Corp.,shoe,a,6.57,11449.076286,15846.420171,1.621151e+07
4024,99987905597,Commodo Hendrerit LLC,motor,a,6.82,101.807210,106.359865,1.947298e+05


In [3]:
# impute missing values and negative values with zero
df_merged = df_merged.fillna(0)
num = df_merged._get_numeric_data()
num[num < 0] = 0
df_merged

Unnamed: 0,merchant_abn,name,tags,revenue_level,take_rate,y_pred_total_num_consumer,y_pred_total_num_transaction,y_pred_total_revenue
0,10023283211,Felis Limited,furniture,e,0.18,2130.976342,2215.326353,6.306584e+05
1,10142254217,Arcu Ac Orci Corporation,cable,b,4.22,1904.673169,1983.119759,3.104424e+05
2,10165489824,Nunc Sed Company,jewelry,b,4.40,0.000000,0.000000,0.000000e+00
3,10187291046,Ultricies Dignissim Lacus Foundation,watch,b,3.29,232.572609,231.071404,8.055615e+04
4,10192359162,Enim Condimentum PC,music,a,6.33,242.355650,239.045462,6.899121e+05
...,...,...,...,...,...,...,...,...
4021,99938978285,Elit Dictum Eu Ltd,opticians,b,4.50,9311.509743,11823.555676,1.513780e+06
4022,99974311662,Mollis LLP,books,b,3.17,97.286540,101.514524,9.482091e+04
4023,99976658299,Sociosqu Corp.,shoe,a,6.57,11449.076286,15846.420171,1.621151e+07
4024,99987905597,Commodo Hendrerit LLC,motor,a,6.82,101.807210,106.359865,1.947298e+05


### Ranking System
The ranking system utilises predicted "total number of consumers", "total number of transactions" and "total revenue" next year and give each merchant a score within 0-100. The ranking score is calculated as follows:

1. Standardise each attribute using min-max normalization 
2. Total number of consumers * 30%
3. BNPL revenue * 40%
4. Total number of transactions * 30%

Revenue accounts for a larger proportion because it is considered to be an important thing to the BNPL company.


In [4]:
# min-max normalization
features = ['pred_total_num_consumer', 'pred_total_num_transaction', 'pred_total_revenue']
for col in features:
    df_merged[f'scaled_{col}'] = 100 * (df_merged[col] - df_merged[col].min()) / (df_merged[col].max() - df_merged[col].min())    

df_merged.head()

KeyError: 'pred_total_num_consumer'

In [None]:
# calculate ranking score for each merchant
df_merged['score'] = 0.3*df_merged['scaled_pred_total_num_consumer'] + 0.3*df_merged['scaled_pred_total_num_transaction'] + 0.4*df_merged['scaled_pred_total_revenue']

df_merged['rank'] = df_merged['score'].rank(ascending=False)
df_merged = df_merged.set_index('rank').sort_index()
df_merged.head()

Unnamed: 0_level_0,merchant_abn,name,tags,revenue_level,take_rate,pred_total_num_consumer,pred_total_num_transaction,pred_total_revenue,scaled_pred_total_num_consumer,scaled_pred_total_num_transaction,scaled_pred_total_revenue,score
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1.0,86578477987,Leo In Consulting,watch,a,6.43,18424.119848,187019.058114,42948920.0,82.008301,93.866355,100.0,92.762397
2.0,45629217853,Lacus Consulting,gift,a,6.98,20247.863835,152088.705358,41344550.0,90.126037,76.334533,96.264467,88.443957
3.0,89726005175,Est Nunc Consulting,tent,a,6.01,20487.094245,148292.570591,37103500.0,91.190884,74.429222,86.389832,84.241965
4.0,49891706470,Non Vestibulum Industries,tent,a,5.8,19617.385531,169851.702773,29164610.0,87.319691,85.249922,67.905351,78.933024
5.0,21439773999,Mauris Non Institute,cable,a,6.1,22427.261712,81994.383461,36715890.0,99.826838,41.153634,85.487345,76.48908


### Split Merchants into 4 Segments
Based on [Merchant Cateogry Groups by ANZ](https://www.anz.com/Documents/Business/CommercialCard/Merchant_cateogry_codes_control.pdf), we devide all merchant into 4 categories.

1. Health service: health, optician

2. Recreational good retailing: bicycle, books, stationary, hobby, tent, digital goods, 

3. Personal & household good retail: antique, watch, jewellery, music, artist supply, gift, art dealer, florists, furniture, shoe, garden supply, 

4. Technical & machinery service: cable, telecom, computer, equipment, motor

In [None]:
# split merchants into 4 segments
segment = {
    'furniture': 'personal & household good retail',
    'cable': 'technical & machinery service',
    'watch': 'personal & household good retail',
    'music': 'personal & household good retail',
    'gift': 'personal & household good retail',
    'computer': 'technical & machinery service',
    'equipment': 'technical & machinery service',
    'artist supply': 'personal & household good retail',
    'florists': 'personal & household good retail',
    'motor': 'technical & machinery service',
    'books': 'recreational good retailing',
    'jewelry': 'personal & household good retail',
    'stationery': 'recreational good retailing',
    'tent': 'recreational good retailing',
    'art dealer': 'personal & household good retail',
    'bicycle': 'recreational good retailing',
    'digital goods': 'recreational good retailing',
    'shoe': 'personal & household good retail',
    'opticians': 'health service',
    'antique': 'personal & household good retail',
    'health': 'health service',
    'hobby': 'recreational good retailing',
    'garden supply': 'personal & household good retail',
    'telecom': 'technical & machinery service'
}

df_merged['segment'] = df_merged['tags'].map(segment)

In [None]:
# find top 100 merchants overall
top100 = df_merged.loc[df_merged.index <= 100]
top100.to_csv('../data/curated/top100.csv')

In [None]:
# find top 10 merchants in each segment
df_merged.loc[df_merged['segment']=='personal & household good retail'].head(10)

Unnamed: 0_level_0,merchant_abn,name,tags,revenue_level,take_rate,pred_total_num_consumer,pred_total_num_transaction,pred_total_revenue,scaled_pred_total_num_consumer,scaled_pred_total_num_transaction,scaled_pred_total_revenue,score,segment
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1.0,86578477987,Leo In Consulting,watch,a,6.43,18424.119848,187019.058114,42948920.0,82.008301,93.866355,100.0,92.762397,personal & household good retail
2.0,45629217853,Lacus Consulting,gift,a,6.98,20247.863835,152088.705358,41344550.0,90.126037,76.334533,96.264467,88.443957,personal & household good retail
6.0,64403598239,Lobortis Ultrices Company,music,a,6.31,22466.164669,77955.86097,36496220.0,100.0,39.126668,84.975878,75.728351,personal & household good retail
7.0,43186523025,Lorem Ipsum Sodales Industries,florists,b,4.47,21202.341117,138223.041477,28205880.0,94.374547,69.375245,65.673087,75.394173,personal & household good retail
8.0,24852446429,Erat Vitae LLP,florists,c,2.94,18397.0508,199239.715252,18467900.0,81.887812,100.0,42.999696,71.766222,personal & household good retail
9.0,32361057556,Orci In Consequat Corporation,gift,a,6.61,21585.405795,58492.62482,35861280.0,96.079621,29.357914,83.497511,71.030265,personal & household good retail
10.0,94493496784,Dictum Phasellus In Institute,gift,a,5.65,22246.385803,67378.171226,32377610.0,99.021734,33.817641,75.386324,70.006342,personal & household good retail
13.0,79417999332,Phasellus At Company,gift,b,4.95,22391.675691,67990.5389,29203880.0,99.668439,34.124993,67.996787,67.336744,personal & household good retail
14.0,63290521567,Vehicula Pellentesque Corporation,artist supply,a,6.48,22137.241349,119974.735676,20020430.0,98.535917,60.216275,46.614523,66.271467,personal & household good retail
17.0,60956456424,Ultricies Dignissim LLP,gift,b,4.69,22142.323584,62221.579534,26097620.0,98.558539,31.229506,60.76433,63.242146,personal & household good retail


In [None]:
df_merged.loc[df_merged['segment']=='technical & machinery service'].head(10)

Unnamed: 0_level_0,merchant_abn,name,tags,revenue_level,take_rate,pred_total_num_consumer,pred_total_num_transaction,pred_total_revenue,scaled_pred_total_num_consumer,scaled_pred_total_num_transaction,scaled_pred_total_revenue,score,segment
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
5.0,21439773999,Mauris Non Institute,cable,a,6.1,22427.261712,81994.383461,36715890.0,99.826838,41.153634,85.487345,76.48908,technical & machinery service
18.0,68216911708,Placerat Eget Venenatis Limited,computer,c,3.05,22179.480322,123795.665204,14821900.0,98.723928,62.13403,34.51052,62.061596,technical & machinery service
23.0,96680767841,Ornare Limited,motor,a,5.91,13358.259421,20437.638529,32805360.0,59.459457,10.257814,76.382273,51.46809,technical & machinery service
24.0,35909341340,Arcu Sed Eu Incorporated,computer,b,4.8,15347.860798,25516.43923,27011890.0,68.315447,12.806904,62.893064,49.493931,technical & machinery service
29.0,67400260923,Eleifend PC,computer,a,5.97,15703.501465,25799.417587,23661310.0,69.898453,12.948933,55.091745,46.890914,technical & machinery service
31.0,94690988633,Eu Placerat LLC,computer,a,6.16,13895.359162,21106.140391,25066590.0,61.850162,10.59334,58.363738,45.078546,technical & machinery service
35.0,58454491168,Diam At Foundation,computer,a,6.01,12295.206852,17815.299684,25534720.0,54.727663,8.941641,59.453696,42.882269,technical & machinery service
39.0,80518954462,Neque Sed Dictum Incorporated,computer,b,3.49,13433.743004,19968.557971,20050350.0,59.795444,10.022378,46.684179,39.619018,technical & machinery service
49.0,17488304283,Posuere Cubilia Curae Corporation,cable,a,6.18,14833.881547,23201.967296,14058790.0,66.027654,11.645252,32.733757,36.395375,technical & machinery service
50.0,13514558491,Magna Praesent PC,motor,a,6.78,9261.062218,12024.158499,23731920.0,41.222266,6.035021,55.256144,36.279644,technical & machinery service


In [None]:
df_merged.loc[df_merged['segment']=='recreational good retailing'].head(10)

Unnamed: 0_level_0,merchant_abn,name,tags,revenue_level,take_rate,pred_total_num_consumer,pred_total_num_transaction,pred_total_revenue,scaled_pred_total_num_consumer,scaled_pred_total_num_transaction,scaled_pred_total_revenue,score,segment
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3.0,89726005175,Est Nunc Consulting,tent,a,6.01,20487.094245,148292.570591,37103500.0,91.190884,74.429222,86.389832,84.241965,recreational good retailing
4.0,49891706470,Non Vestibulum Industries,tent,a,5.8,19617.385531,169851.702773,29164610.0,87.319691,85.249922,67.905351,78.933024,recreational good retailing
11.0,72472909171,Nullam Consulting,digital goods,a,6.33,22452.125399,69864.891981,30769820.0,99.937509,35.065746,71.642824,69.158106,recreational good retailing
12.0,64203420245,Pede Nonummy Corp.,tent,c,2.86,19445.345139,179730.272883,15973630.0,86.553915,90.208055,37.192157,67.905454,recreational good retailing
20.0,98973094975,Ornare Fusce Inc.,hobby,a,5.98,18805.618554,37234.745984,28016620.0,83.706404,18.688416,65.232419,56.811413,recreational good retailing
22.0,49505931725,Suspendisse Ac Associates,digital goods,b,4.7,20700.055683,46964.302731,23170020.0,92.138805,23.571758,53.947846,56.292307,recreational good retailing
26.0,35223308778,Euismod In Corp.,books,b,4.19,17267.513531,30539.425023,22102230.0,76.860086,15.327981,51.461669,48.241088,recreational good retailing
37.0,57900494384,Porttitor Tellus Corporation,tent,a,6.39,20777.940056,45433.031962,5896677.0,92.485479,22.803201,13.729512,40.078409,recreational good retailing
38.0,91923722701,Euismod Urna Institute,tent,b,5.05,21554.269116,50434.799116,3935804.0,95.941027,25.313627,9.163919,40.041964,recreational good retailing
41.0,96152467973,Rhoncus Donec Associates,tent,b,4.45,20950.436589,46434.965454,4368107.0,93.253285,23.306079,10.170469,39.035997,recreational good retailing


In [None]:
df_merged.loc[df_merged['segment']=='health service'].head(10)

Unnamed: 0_level_0,merchant_abn,name,tags,revenue_level,take_rate,pred_total_num_consumer,pred_total_num_transaction,pred_total_revenue,scaled_pred_total_num_consumer,scaled_pred_total_num_transaction,scaled_pred_total_revenue,score,segment
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
15.0,48534649627,Dignissim Maecenas Foundation,opticians,a,6.64,19815.016496,43727.399132,35482140.0,88.199374,21.94713,82.614752,66.089852,health service
16.0,46804135891,Suspendisse Dui Corporation,opticians,c,2.93,20658.123723,156457.125445,14943230.0,91.95216,78.527077,34.793033,65.060984,health service
76.0,11566786699,Euismod Et Institute,opticians,c,2.62,17236.981577,29767.087733,2279885.0,76.724184,14.940338,5.308365,29.622703,health service
81.0,41251795489,Ultricies Sem Limited,opticians,c,2.91,16477.506055,27195.162539,2258437.0,73.343654,13.649469,5.258426,28.201307,health service
92.0,18158387243,Nec Tellus Ltd,health,c,2.03,8614.577745,10809.436463,13452390.0,38.344675,5.425342,31.321827,25.659736,health service
99.0,95574756848,At Pede Inc.,opticians,a,6.15,6440.832791,7590.71519,15581790.0,28.669036,3.80984,36.279811,24.255587,health service
101.0,22059270846,Montes Nascetur Ridiculus Limited,opticians,a,6.59,13327.032188,19118.854624,3533241.0,59.32046,9.595905,8.226612,23.965554,health service
102.0,88699453206,Sed Nec Inc.,health,b,3.53,6784.287852,8104.475549,14555210.0,30.197802,4.067701,33.889586,23.835485,health service
113.0,11237511112,Magna Institute,opticians,c,2.11,10727.462619,14281.054137,6214697.0,47.749417,7.167775,14.469974,22.263147,health service
118.0,81410315303,Sed Dictum PC,opticians,a,6.35,11610.838917,15971.040165,4277344.0,51.681447,8.015992,9.959142,21.892888,health service
