In [1]:
import pandas as pd
import numpy as np

In [2]:
full = pd.read_csv('../data/curated/full_no_missing.csv').drop(columns = 'Unnamed: 0')
full.dtypes

merchant_abn                    int64
total_num_consumer              int64
avg_dollar_value              float64
total_num_transaction           int64
mean_income                   float64
revenue_level                  object
total_revenue                 float64
total_num_postcode              int64
tag                            object
next_total_num_consumer       float64
next_total_revenue            float64
next_total_num_transaction    float64
dtype: object

### Mapping tags
All the tags will be concluded into 4 categories:
    1. Health service: health, optician

    2. Recreational good retailing: bicycle, books, stationary, hobby, tent, digital goods, 

    3. Personal & household good retail: antique, watch, jewellery, music, artist supply, gift, art dealer, florists, furniture, shoe, garden supply, 

    4. Technical & machinery service: cable, telecom, computer, equipment, motor

In [3]:
full['tag'].unique()

array(['furniture', 'cable', 'watch', 'music', 'gift', 'computer',
       'equipment', 'artist supply', 'florists', 'motor', 'books',
       'jewelry', 'stationery', 'tent', 'art dealer', 'bicycle',
       'digital goods', 'shoe', 'opticians', 'antique', 'health', 'hobby',
       'garden supply', 'telecom'], dtype=object)

In [4]:
# reduce the number of tages from 24 to 4 by mapping

tags = {
    'furniture': 'Personal & household good retail',
    'cable': 'Technical & machinery service',
    'watch': 'Personal & household good retail',
    'music': 'Personal & household good retail',
    'gift': 'Personal & household good retail',
    'computer': 'Technical & machinery service',
    'equipment': 'Technical & machinery service',
    'artist supply': 'Personal & household good retail',
    'florists': 'Personal & household good retail',
    'motor': 'Technical & machinery service',
    'books': 'Recreational good retailing',
    'jewelry': 'Personal & household good retail',
    'stationery': 'Recreational good retailing',
    'tent': 'Recreational good retailing',
    'art dealer': 'Personal & household good retail',
    'bicycle': 'Recreational good retailing',
    'digital goods': 'Recreational good retailing',
    'shoe': 'Personal & household good retail',
    'opticians': 'Health service',
    'antique': 'Personal & household good retail',
    'health': 'Health service',
    'hobby': 'Recreational good retailing',
    'garden supply': 'Personal & household good retail',
    'telecom': 'Technical & machinery service'
}

full['tag'] = full['tag'].map(tags)

## Ranking System

1. Only predicted futurn features total number of consumer, total revenue, total number of transaction are considered in the ranking system. 

2. Scores from 0 to 100 are given to each merchant based on their unique merchant abns. 

3. Tags are also included since lists of top 100 for each tags are provided.

In [5]:
features = ['merchant_abn', 'tag', 'total_num_consumer', 'next_total_revenue', 'next_total_num_transaction']
full = full[features]
full.dtypes

merchant_abn                    int64
tag                            object
total_num_consumer              int64
next_total_revenue            float64
next_total_num_transaction    float64
dtype: object

In [6]:
# score = Total number of consumers * 30% + BNPL revenue * 40% + Total number of transactions * 30% since revenue should be the most important part
full['score'] = 0.3*full['total_num_consumer'] + 0.4*full['next_total_revenue'] + 0.3*full['next_total_num_transaction']
full['score'].min(), full['score'].max()

(36.43659363579862, 574047759026.7616)

In [7]:
# in order to make all the score within 0-100, the raw lowest score will be set as 0 and the highest will be set as 100
k = 100/(full['score'].max() - full['score'].min())
full['score_scaled'] = k*(full['score']-full['score'].min())
full

Unnamed: 0,merchant_abn,tag,total_num_consumer,next_total_revenue,next_total_num_transaction,score,score_scaled
0,10023283211,Personal & household good retail,808,3.894029e+04,1002.0,1.611912e+04,2.801628e-06
1,10142254217,Technical & machinery service,731,1.544679e+05,925.0,6.228397e+04,1.084362e-05
2,10187291046,Personal & household good retail,87,4.168321e+04,100.0,1.672938e+04,2.907937e-06
3,10192359162,Personal & household good retail,107,2.900710e+05,107.0,1.160926e+05,2.021716e-05
4,10206519221,Personal & household good retail,2244,6.667664e+05,2811.0,2.682231e+05,4.671852e-05
...,...,...,...,...,...,...,...
3948,99938978285,Health service,3920,7.204531e+05,5104.0,2.908884e+05,5.066686e-05
3949,99974311662,Recreational good retailing,33,3.991901e+04,40.0,1.598950e+04,2.779048e-06
3950,99976658299,Personal & household good retail,5353,6.974851e+06,7063.0,2.793665e+06,4.866544e-04
3951,99987905597,Technical & machinery service,45,1.762053e+05,69.0,7.051630e+04,1.227770e-05


### Total Rank

In [8]:
full_score_rank = full
full_score_rank['score_rank'] = full['score_scaled'].rank(ascending=False)

# rank the merchants based on score in ascending order
full_score_rank = full_score_rank.set_index('score_rank').sort_index()
full_score_rank

Unnamed: 0_level_0,merchant_abn,tag,total_num_consumer,next_total_revenue,next_total_num_transaction,score,score_scaled
score_rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,15043504837,Personal & household good retail,29,1.435008e+12,1.481226e+08,5.740478e+11,1.000000e+02
2.0,44345785419,Personal & household good retail,12,9.587770e+11,9.893958e+07,3.835405e+11,6.681334e+01
3.0,57860746842,Personal & household good retail,23,8.021699e+11,8.282366e+07,3.208928e+11,5.590002e+01
4.0,83199298021,Personal & household good retail,10,7.202257e+11,7.432607e+07,2.881126e+11,5.018966e+01
5.0,54860127682,Personal & household good retail,5,5.169637e+11,5.334227e+07,2.068015e+11,3.602513e+01
...,...,...,...,...,...,...,...
3949.0,55622580330,Recreational good retailing,8,3.266833e+02,1.000000e+01,1.360733e+02,1.735687e-08
3950.0,48699038894,Technical & machinery service,2,3.250766e+02,1.000000e+00,1.309306e+02,1.646100e-08
3951.0,82329510503,Technical & machinery service,2,1.850999e+02,1.000000e+00,7.493995e+01,6.707343e-09
3952.0,69893361647,Personal & household good retail,4,1.465627e+02,1.000000e+00,6.012507e+01,4.126569e-09


In [9]:
full_score_rank.to_csv('../data/curated/full_score_rank.csv')