# Setup 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from thefuzz import fuzz
from thefuzz import process

# MER

## Load data

In [2]:
MER = pd.read_excel('../data/MER.xlsx', sheet_name=[0, 1])
MERT, MERPPL = MER[0], MER[1]

### Column types

In [3]:
MERT.dtypes
MERPPL.dtypes

MatNo     int64
Points    int64
dtype: object

### Check null (general)

In [4]:
isNullMERT = MERT.isna().any().any()
isNullMERPPL = MERPPL.isna().any().any()
print(f"MERT: {str(isNullMERT)}, MERPPL: {str(isNullMERPPL)}")

MERT: False, MERPPL: False


### Check duplicated (general)

In [5]:
isDuplicatedMERT = MERT.duplicated().any()
isDuplicatedMERPPL = MERPPL.duplicated().any()

print(f"MERT: {str(isDuplicatedMERT)}, MERPPL: {str(isDuplicatedMERT)}")

MERT: False, MERPPL: False


## Investigate columns

### Invoicedate

In [6]:
dataType = str(MERT['Invoicedate'].dtypes)
invoicedateMin = MERT['Invoicedate'].min()
invoicedateMax = MERT['Invoicedate'].max()
print(f"Type: {dataType}, Min: {invoicedateMin}, Max:{invoicedateMax}")

Type: datetime64[ns], Min: 2023-01-03 00:00:00, Max:2023-12-29 00:00:00


The data is for the year 2023

### Distrubutor

In [7]:
MERT['Distributor'].unique()

array(['A', 'B', 'C', 'D'], dtype=object)

### GroupId

In [8]:
MERT['GroupId'].unique()
groupidMin = MERT['GroupId'].min()
groupidMax = MERT['GroupId'].max()
print(f"Min: {str(groupidMin)}, Max: {str(groupidMax)}")

Min: ERW_A1DS2023040002, Max: ERW_DYZP2023100001


### Type

In [9]:
MERT['Type'].unique()

array(['Reseller', 'Retails'], dtype=object)

### Provincename

In [10]:
MERT['Provincename'].unique() # found 'N\\A'
nullProvincename = MERT[MERT['Provincename'] == 'N\\A']['GroupId'].unique().tolist()
MERT[MERT['GroupId'].isin(nullProvincename)] # yep no way to fill this

Unnamed: 0,Invoicedate,Distributor,GroupId,Type,Provincename,MatNo,Material Name,New_Volume(L)
1059,2023-12-15,A,ERW_A7QF2022060003,Retails,N\A,145577,M SUP Frict Fighter TBS 10W-30 4x7L/CAR,672
1771,2023-11-23,A,ERW_A7QF2022060003,Retails,N\A,145472,M DEL MODERN 10W-30 S DEFENS V2 4X7L/CAR,476
2489,2023-11-23,A,ERW_A7QF2022060003,Retails,N\A,145570,M SUP Friction Fighter 10W-40 4x4+1L/CAR,4320
3191,2023-03-17,A,ERW_A6CM2022070002,Retails,N\A,143629,MOBIL SUPER 2000 TBS 10W-30 4X7L/CAR,9492
3192,2023-07-21,A,ERW_A6CM2022070002,Retails,N\A,145577,M SUP Frict Fighter TBS 10W-30 4x7L/CAR,9212
4427,2023-02-14,A,ERW_A7QF2022060003,Retails,N\A,144373,MOBIL DELVAC MX 10W-30 CTN 4X6+1L:TH,6692
4754,2023-12-15,A,ERW_A7QF2022060003,Retails,N\A,145570,M SUP Friction Fighter 10W-40 4x4+1L/CAR,3760
4954,2023-02-13,A,ERW_A7QF2022060003,Retails,N\A,142923,MOBIL SPECIAL 20W-50 CTN 4X4L:TH,6544
5690,2023-05-08,B,ERW_BXEO2021030004,Retails,N\A,143629,"โมบิล SUPER 2000 TBS 10W30 ,4/7",3164
5805,2023-11-03,A,ERW_A7QF2022060002,Retails,N\A,140043,MOBIL ATF 3309 12x1L/CAR,5392


Since A and B both share Bangkok and Bangkok is the highest market share, assume that all of these groupid are from Bangkok

In [11]:
MERT.loc[MERT['Provincename'] == 'N\A', 'Provincename'] = 'Bangkok'
MERT['Provincename'].unique()

array(['Chachoengsao', 'Nakhon Ratchasima', 'Nakhon Pathom', 'Phetchabun',
       'Rayong', 'Khon Kaen', 'Trat', 'Suphan Buri', 'Lop Buri',
       'Bangkok', 'Kanchanaburi', 'Chai Nat', 'Chon Buri', 'Phuket',
       'Udon Thani', 'Saraburi', 'Ratchaburi', 'Nakhon Si Thammarat',
       'Chainat', 'Chanthaburi', 'Chumphon', 'Yasothon', 'Chiang Rai',
       'Phangnga', 'Songkhla', 'Ubon Ratchathani', 'Maha Sarakham', 'Tak',
       'Chiang Mai', 'Samut Sakhon', 'Prachuap Khiri Khan', 'Krabi',
       'Pathum Thani', 'Roi Et', 'Sukhothai', 'Nakhon Sawan',
       'Surat Thani', 'Phra Nakhon Si Ayutthaya', 'Nong Bua Lam Phu',
       'Sing Buri', 'Nakhon Phanom', 'Nakhon Nayok', 'Surin', 'Uttaradit',
       'Lampang', 'Sa Kaeo', 'Lamphun', 'Pattani', 'Trang',
       'Kamphaeng Phet', 'Phitsanulok', 'Uthai Thani', 'Phrae', 'Phichit',
       'Sakon Nakhon', 'Loei', 'Nonthaburi', 'Samut Prakan', 'Nong Khai',
       'Phetchaburi', 'Nan', 'Buri Ram', 'Si Sa Ket', 'Prachin Buri',
       'Kalasin', 'P

### MatNo

In [12]:
MERT['MatNo'].unique()

array([145577, 143160, 145438, 145500, 145498, 145497, 145474, 141539,
       143877, 143629, 145575, 142939, 145570, 143507, 142930, 140918,
       142931, 143628, 135896, 145472, 135899, 145451, 144673, 135508,
       145444, 134728, 145888, 140521, 140043, 145826, 137227, 145841,
       145496, 145828, 140418, 145832, 134707, 143623, 124213, 143586,
       135898, 135895, 137232, 142938, 144373, 143566, 135509, 143898,
       135904, 143132, 145644, 140363, 141559, 145742, 146130, 144463,
       142923, 134346, 137219, 145569, 139840, 135903, 144473, 123150,
       143900, 145439, 143508, 135892, 145450, 143625, 134525, 143133,
       143110, 143899, 135872, 140417, 143874, 144672, 134349, 134345,
       145572, 145473, 143354, 145499, 143162, 134782, 145573, 135162,
       142924, 145447, 143106, 143574, 134533, 142212, 145930, 140709,
       133405, 124216, 142889, 145838, 145840, 140523, 144462, 145837,
       140635, 143576, 145727, 143099, 145449, 142932, 139706, 139759,
      

### Material Name

In [168]:
def extractName(series):    
    # extract viscosity, None
    name = series.name
    #pattern 1 _00w-00_
    viscosity = series.str.extract(r'\b(\d+)W-(\d+)\b') 
    #pattern 2 -00w00-
    viscosity.loc[(viscosity.isna().all(axis=1))] = series[viscosity.isna().all(axis=1)].str.extract(r'(\d{2})W(\d{2})')
    #pattern 3 _00w00_
    viscosity.loc[(viscosity.isna().all(axis=1))] = series[viscosity.isna().all(axis=1)].str.extract(r'\b(\d+)W(\d+)\b')
    #pattern 4 00w-00
    viscosity.loc[(viscosity.isna().all(axis=1))] = series[viscosity.isna().all(axis=1)].str.extract(r'(\d+)W-(\d+)')
    
    # if any pattern is found
    value = viscosity.dropna()
    if value.empty:
        finalVis = np.nan
    else:
        finalVis = (value[0].astype(str) + "w-" + value[1].astype(str)).values[0]


    # special pattern single w: 10w
    if finalVis is np.nan:
        specialVis = series.str.extract(r'\b(\d+)W\b')
        
        specialVisvalue = specialVis.dropna()

        if not specialVisvalue.empty:
            finalVis = str(specialVisvalue.values[0][0]) + "w"

    # isDelvac: True, or False

    #pattern: contain Del (case insensitive)
    containDel = series.str.contains('del', case=False).any()
    if containDel:
        finalDel = True
    else:
        finalDel = False


    # product line: super, one, None
    containSuper = series.str.contains('sup', case=False).any()
    containOne = series.str.contains(r'.*\b1\b.*',regex=True).any()


    if containSuper:
        finalLine = "Super"
    elif containOne:
        finalLine = "One"
    else:
        finalLine = np.nan

    # product group name: just select one
    groupName = series.values[0]
    

    cols = ['Viscosity', 'isDelvac', 'ProductLine', 'GroupName']
    
    return pd.Series((finalVis, finalDel, finalLine, groupName), index=cols)
    




pd.set_option('display.max_colwidth', None)
MaterialTable = MERT.groupby('MatNo')['Material Name'].apply(extractName).unstack(-1)

In [169]:
MaterialTable

Unnamed: 0_level_0,Viscosity,isDelvac,ProductLine,GroupName
MatNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
123150,,False,,"MOBIL MULTIPURPOSE ATF, 208LT DR"
124213,,False,,MOBIL ATF 220 D 208LT
124216,,False,,MOBIL ATF 220 P1/20LT
133405,20w-50,True,Super,"MOBIL DELVAC SUPER 20W-50, 12X1LT CTN"
134345,10w,False,,"MOBIL HYDRAULIC 10W, 4X5LT CTN"
...,...,...,...,...
145840,0w-40,False,One,MOBIL 1 0W-40 (SP) V2 12X1L/CAR
145841,0w-40,False,One,MOBIL 1 0W-40 (SP) V2 4X4L/CAR
145888,20w-50,False,Super,M SUP EVD Protection 20W-50 4x4+1L/CAR
145930,10w-30,False,Super,"M SUPER FF 10W-30, 4X4LT CTN"


In [170]:
MaterialTable[MaterialTable["ProductLine"].isna()]

Unnamed: 0_level_0,Viscosity,isDelvac,ProductLine,GroupName
MatNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
123150,,False,,"MOBIL MULTIPURPOSE ATF, 208LT DR"
124213,,False,,MOBIL ATF 220 D 208LT
124216,,False,,MOBIL ATF 220 P1/20LT
134345,10w,False,,"MOBIL HYDRAULIC 10W, 4X5LT CTN"
134348,,False,,MOBIL BRAKE FLUID DOT-4 12X0.5L/CAR
134533,,True,,"MOBIL DELVAC 1330, P18LT"
134782,,True,,"MOBIL DELVAC 1340, 4X6LT CTN"
135162,,False,,"MOBILFLUID 424, 20L PAIL"
135508,,True,,"MOBIL DELVAC 1640, 4X6LT CTN - N"
135895,85w-140,False,,MOBILUBE HD 85W-140 4/5 LT
