In [65]:
import pandas as pd
import numpy as np

In [66]:
merchant_fraud_df = pd.read_csv("../data/curated/merchant_detail_with_fraud_prob.csv")
merchant_growth_df = pd.read_csv("../data/curated/merchant_detail_with_monthly_growth.csv")

In [67]:
merchant_detail = merchant_fraud_df.merge(merchant_growth_df, on = 'merchant_abn', how = 'inner')\
                                    [['merchant_abn', 'merchant_name_x','type_x', 'fraud_probability', 'monthly_profit_growth', 'monthly_profit_x']]\
                                    .rename(columns = {'merchant_name_x':'merchant_name', 'monthly_profit_x':'monthly_profit', 'type_x' : 'type'})

We aim to find the merchant with higher profit and better development potential, so in our ranking system, there are three variables: fraud probability, monthly profit growth and monthly profit.

Assume the grwoth rate and fraud probability will not change for the next year, we will try to estimate the expected monthly profit after one year as follows:

                Estimated profit = (monthly_profit  + 12 * monthly_profit_growth) * (1-fraud_probability)

In [68]:
merchant_detail['estimated profit'] = (merchant_detail['monthly_profit'] + 12 * merchant_detail['monthly_profit_growth'])*\
                                      (1 - merchant_detail['fraud_probability']/100)

In [69]:
merchant_detail.sort_values('estimated profit', ascending = False)

Unnamed: 0,merchant_abn,merchant_name,type,fraud_probability,monthly_profit_growth,monthly_profit,estimated profit
3,86578477987,Leo In Consulting,"watch, clock, and jewelry repair shops",26.106311,386.294528,30161.0,25712.443039
1,48534649627,Dignissim Maecenas Foundation,"opticians, optical goods, and eyeglasses",28.567933,423.618912,30680.0,25546.555107
2,32361057556,Orci In Consequat Corporation,"gift, card, novelty, and souvenir shops",28.107352,373.577904,30626.0,25240.743106
0,79827781481,Amet Risus Inc.,"furniture, home furnishings and equipment shop...",29.735159,259.730041,32801.0,25237.557189
5,45629217853,Lacus Consulting,"gift, card, novelty, and souvenir shops",27.141809,378.858105,28839.0,24323.923751
...,...,...,...,...,...,...,...
3056,25781502446,Lectus Pede Limited,"antique shops - sales, repairs, and restoratio...",76.409104,-13.663653,68.0,-22.638729
2502,29027010572,Ut Mi Associates,"jewelry, watch, clock, and silverware shops",69.521138,-16.395932,121.0,-23.088100
1185,21986309398,Ut Dolor Consulting,"antique shops - sales, repairs, and restoratio...",61.560946,-73.541845,807.0,-29.022307
1536,53877856360,Sem Magna Company,"antique shops - sales, repairs, and restoratio...",67.727133,-62.877455,493.0,-84.403056


Find the most profitable merchants for different merchants:

In [70]:
Technology_and_Equipment = ['opticians, optical goods, and eyeglasses',\
                           'furniture, home furnishings and equipment shops, and manufacturers, except appliances',\
                           'telecom',\
                           'computers, computer peripheral equipment, and software'\
                           'equipment, tool, furniture, and appliance rent al and leasing']

Hobbies = ['digital goods: books, movies, music',\
           'art dealers and galleries',\
           'tent and awning shops',\
           'artist supply and craft shops',\
           'hobby, toy and game shops ',\
           'music shops - musical instruments, pianos, and sheet music']

Luxury = ['antique shops - sales, repairs, and restoration services',\
          'jewelry, watch, clock, and silverware shops',\
          'motor vehicle supplies and new parts',\
          'bicycle shops - sales and service']

Groceries = ['florists supplies, nursery stock, and flowers',\
             'books, periodicals, and newspapers',\
             'gift, card, novelty, and souvenir shops',\
             'stationery, office supplies and printing and writing paper',\
             'shoe shops']

Service = ['cable, satellite, and other pay television and radio services',\
           'watch, clock, and jewelry repair shops',\
           'computer programming , data processing, and integrated systems design services',\
           'lawn and garden supply outlets, including nurseries',\
           'health and beauty spas']

In [71]:
def get_segment(df, segment):
    segment_df = pd.DataFrame()
    for atype in segment: 
        segment_df = pd.concat([segment_df, merchant_detail[merchant_detail['type'] == atype]], ignore_index=True, sort=False)
    return segment_df

In [72]:
Technology_and_Equipment_df = get_segment(merchant_detail, Technology_and_Equipment).sort_values('estimated profit', ascending = False)
Technology_and_Equipment_df.head(10)

Unnamed: 0,merchant_abn,merchant_name,type,fraud_probability,monthly_profit_growth,monthly_profit,estimated profit
0,48534649627,Dignissim Maecenas Foundation,"opticians, optical goods, and eyeglasses",28.567933,423.618912,30680.0,25546.555107
151,79827781481,Amet Risus Inc.,"furniture, home furnishings and equipment shop...",29.735159,259.730041,32801.0,25237.557189
333,82368304209,Nec Incorporated,telecom,31.472067,437.164066,26392.0,21680.846148
152,76767266140,Phasellus At Limited,"furniture, home furnishings and equipment shop...",29.539733,262.467067,21329.0,17247.690248
153,38090089066,Interdum Feugiat Sed Inc.,"furniture, home furnishings and equipment shop...",29.286271,197.185227,14162.0,11687.72265
334,93260930990,Pede Cras Vulputate Ltd,telecom,32.261675,175.00065,13813.0,10779.20502
154,90543168331,Phasellus Dapibus Incorporated,"furniture, home furnishings and equipment shop...",29.881072,157.633354,11872.0,9650.888913
1,95574756848,At Pede Inc.,"opticians, optical goods, and eyeglasses",36.464194,99.442996,12366.0,8615.020727
2,46804135891,Suspendisse Dui Corporation,"opticians, optical goods, and eyeglasses",28.18901,131.582755,10128.0,8406.907558
155,35809331583,Euismod In LLC,"furniture, home furnishings and equipment shop...",41.861356,172.475253,9418.0,6678.794814


In [73]:
Hobbies_df = get_segment(merchant_detail, Hobbies).sort_values('estimated profit', ascending = False)
Hobbies_df.head(10)

Unnamed: 0,merchant_abn,merchant_name,type,fraud_probability,monthly_profit_growth,monthly_profit,estimated profit
300,38700038932,Etiam Bibendum Industries,tent and awning shops,32.140727,407.92607,29735.0,23499.742704
669,64403598239,Lobortis Ultrices Company,"music shops - musical instruments, pianos, and...",28.020586,328.763921,27493.0,22629.008307
301,89726005175,Est Nunc Consulting,tent and awning shops,26.950795,344.695067,26296.0,22230.582994
477,63123845164,Odio Phasellus Institute,artist supply and craft shops,32.055305,337.892931,27715.0,21585.835927
0,72472909171,Nullam Consulting,"digital goods: books, movies, music",29.497616,268.704917,22272.0,17975.611385
479,67978471888,Magna Malesuada Corp.,artist supply and craft shops,32.241174,309.621907,22088.0,17484.123476
478,40515428545,Elit Sed Consequat Associates,artist supply and craft shops,32.392481,267.521394,22554.0,17418.574777
303,49891706470,Non Vestibulum Industries,tent and awning shops,27.981022,267.805367,20434.0,17030.806241
192,98166254020,Magna Sed Industries,art dealers and galleries,34.635465,287.959169,21722.0,16457.162287
670,27326652377,Tellus Aenean Corporation,"music shops - musical instruments, pianos, and...",34.661571,412.979388,20186.0,16427.226243


In [74]:
Luxury_df = get_segment(merchant_detail, Luxury).sort_values('estimated profit', ascending = False)
Luxury_df.head(10)

Unnamed: 0,merchant_abn,merchant_name,type,fraud_probability,monthly_profit_growth,monthly_profit,estimated profit
174,96680767841,Ornare Limited,motor vehicle supplies and new parts,29.555245,423.93921,28499.0,23659.766088
0,31334588839,Lacus Aliquam Corporation,"antique shops - sales, repairs, and restoratio...",32.361119,336.744211,20246.0,16427.40811
98,19492220327,Commodo Ipsum Industries,"jewelry, watch, clock, and silverware shops",31.958307,332.707832,19966.0,16301.764998
176,31385641294,Semper Auctor PC,motor vehicle supplies and new parts,34.04105,316.752758,18506.0,14713.484802
175,13514558491,Magna Praesent PC,motor vehicle supplies and new parts,33.752531,221.001096,18891.0,14271.70096
177,12771097467,At Pretium Corp.,motor vehicle supplies and new parts,38.887615,343.922864,16920.0,12862.369204
1,68559320474,Aliquam Auctor Associates,"antique shops - sales, repairs, and restoratio...",30.823426,213.90466,15799.0,12704.869919
178,22033359776,Suspendisse Non Leo PC,motor vehicle supplies and new parts,30.648731,155.103709,13490.0,10646.282816
322,87084550311,Vulputate Inc.,bicycle shops - sales and service,40.732058,248.000508,13822.0,9955.832465
180,21359184622,Sit Amet PC,motor vehicle supplies and new parts,32.720464,169.52465,11492.0,9100.429044


In [75]:
Groceries_df = get_segment(merchant_detail, Groceries).sort_values('estimated profit', ascending = False)
Groceries_df.head(10)

Unnamed: 0,merchant_abn,merchant_name,type,fraud_probability,monthly_profit_growth,monthly_profit,estimated profit
342,32361057556,Orci In Consequat Corporation,"gift, card, novelty, and souvenir shops",28.107352,373.577904,30626.0,25240.743106
343,45629217853,Lacus Consulting,"gift, card, novelty, and souvenir shops",27.141809,378.858105,28839.0,24323.923751
344,94493496784,Dictum Phasellus In Institute,"gift, card, novelty, and souvenir shops",30.579032,323.889667,25281.0,20248.48298
345,79417999332,Phasellus At Company,"gift, card, novelty, and souvenir shops",28.0488,296.003626,22227.0,18548.331067
0,43186523025,Lorem Ipsum Sodales Industries,"florists supplies, nursery stock, and flowers",26.946525,255.772618,19843.0,16738.210421
346,60956456424,Ultricies Dignissim LLP,"gift, card, novelty, and souvenir shops",28.944705,245.817366,18505.0,15244.777396
1,49212265466,Auctor Company,"florists supplies, nursery stock, and flowers",30.429939,252.798718,17985.0,14622.642149
523,57757792876,Pretium Et LLC,"stationery, office supplies and printing and w...",34.035424,208.188945,17231.0,13014.327482
2,21772962346,Purus Gravida Sagittis Ltd,"florists supplies, nursery stock, and flowers",33.248684,185.224021,16836.0,12721.925216
179,35223308778,Euismod In Corp.,"books, periodicals, and newspapers",30.680506,220.442775,15086.0,12291.256724


In [76]:
Service_df = get_segment(merchant_detail, Service).sort_values('estimated profit', ascending = False)
Service_df.head(10)

Unnamed: 0,merchant_abn,merchant_name,type,fraud_probability,monthly_profit_growth,monthly_profit,estimated profit
174,86578477987,Leo In Consulting,"watch, clock, and jewelry repair shops",26.106311,386.294528,30161.0,25712.443039
0,21439773999,Mauris Non Institute,"cable, satellite, and other pay television and...",28.504479,354.912437,28256.0,23246.732353
175,49322182190,Gravida Mauris Incorporated,"watch, clock, and jewelry repair shops",30.046734,323.078932,24522.0,19865.991078
343,35909341340,Arcu Sed Eu Incorporated,"computer programming , data processing, and in...",29.133053,281.584173,22446.0,18301.396175
344,58454491168,Diam At Foundation,"computer programming , data processing, and in...",32.082249,235.801449,19816.0,15380.393944
345,67400260923,Eleifend PC,"computer programming , data processing, and in...",32.554349,196.838735,16533.0,12743.899556
534,42355028515,Eu Inc.,"lawn and garden supply outlets, including nurs...",32.824863,166.75185,16169.0,12205.737335
346,77590625261,Sed Diam Foundation,"computer programming , data processing, and in...",31.458635,135.225335,12074.0,9387.907927
535,98518649381,Nullam Scelerisque Neque Corp.,"lawn and garden supply outlets, including nurs...",38.665522,135.756702,11602.0,8115.214158
176,62224020443,Hendrerit A Corporation,"watch, clock, and jewelry repair shops",36.660908,128.161323,11110.0,8011.087788
