In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('extracted.csv')

In [3]:
exceptions = {
        '8703.80.39': '8703.80.29',
        '8711.30.19': '8711.30.90',
        '8711.90.90': '8711.90',
        '8703.80.49': '8703.80.50',
        '8704.22.96': '8704.22.99',
        '8711.30.11': '8711.30.10',
        '8704.90.40': '8704.60',
        '8711.30.91': '8711.30.10',
        '8711.90.10': '8711.60',
        '8711.30.99': '8711.30.90',
        '8704.22.94': '8704.22.99',
        '8702.10.90': '8702.10.49',
        '8703.90.11': '8703.80.11'
}

In [4]:
def str_hscode(hscode:int):
    hscode = str(hscode)
    hscode = f'{hscode[:4]}.{hscode[4:6]}.{hscode[6:]}' if len(hscode) == 8 \
        else f'{hscode[:4]}.{hscode[4:6]}' if len(hscode) == 6 else str(hscode)
    while hscode.endswith('.00'):
            hscode = hscode[:len(hscode)-3]
    return hscode

In [5]:
def hscode_lookup(hscode: int):
    temp = None
    lookup = pd.read_csv('hscode/data/fy-2079-80.csv', index_col='Sub-heading')
    res = {}
    hscode = str_hscode(hscode)
    descriptions = []
    duties = {}
    while hscode:
        while hscode.endswith('.00'):
            hscode = hscode[:len(hscode)-3]
        if not temp:
            temp = hscode
        hscode = exceptions.get(hscode, hscode)
        details = lookup.loc[hscode]
        if not duties:
            duties['import_duty_saarc'] = details['Import Duty SAARC']
            duties['import_duty_general'] = details['Import Duty General']
        descriptions.append(details['Description of goods'])
        if not hscode.endswith('0') and len(hscode) > 4:
            details = lookup.loc[hscode[:-1]+'0']
            descriptions.append(details['Description of goods'])
        hscode = '.'.join(hscode.split('.')[:len(hscode.split('.'))-1])
    res['descriptions'] = descriptions 
    res['hscode'] = hscode
    if duties:
        res['duties'] = duties
    return res


In [6]:
matchings = {
    'fuel':{
        'bev': ['8701.24', '8702.40', '8703.80', '8704.60', '8704.90.20', '8704.90.30', '8711.60'],
        'hev': ['8703.60', '8703.70', '8703.40', '8703.50', '8701.22', '8701.23', '8702.20', '8702.30', '8704.40', '8704.50'],
    }, 
    'type': {
        'bus':['8702.10.10', '8702.20.10', '8702.30.10', '8702.40.10', '8702.90.10'],
        'minibus': ['8702.10.20', '8702.20.20', '8702.30.20', '8702.40.20', '8702.90.20'],
        'microbus': ['8702.10.30', '8702.20.30', '8702.30.30', '8702.40.30', '8702.90.30'],
        'jeepvan': ['8702.10.4', '8702.20.4', '8702.30.4', '8702.40.4', '8702.90.4'],
        'fourwheeler': ['8703'],
        'threewheeler': ['8703.21.1', '8703.80.1'],
        'threewheelergoods': ['8704.21.40', '8704.31.40', '8704.41.40', '8704.51.40', '8704.60.20', '8704.90.20'],
        'twowheeler': ['8711']
    }
}

In [7]:
def get_fuel_type(hscode:str):
    for ftype,codes in matchings['fuel'].items():
        for code in codes:
            if hscode.startswith(code):
                return ftype
    return 'defacto'

In [8]:
def get_vehicle_type(hscode:str):
    for vtype, codes in matchings['type'].items():
        for code in codes:
            if hscode.startswith(code):
                return vtype
    return 'unknown'

In [9]:
new_df = pd.DataFrame()
for i, row in df.iterrows():
    hscode = str_hscode(row['hscode'])

    ftype = get_fuel_type(hscode)
    vtype = get_vehicle_type(hscode)

    new_row_data = pd.DataFrame({
        'fy': row['fy'],
        'month': row['month'],
        'hscode': hscode,
        'type': vtype,
        'fuel': ftype,
        'quantity': row['quantity'],
        'value': row['value'],
        'revenue': row['revenue']
    }, index=[0])
    new_df = pd.concat([new_df, new_row_data], ignore_index=True)

In [10]:
new_df.to_csv('final.csv', index=False)