# (Open) Shopee Code League - Logistics

We use business_calender to calculate working days.
This scores 1.00000 with late submission.

## 0. Set environment

In [1]:
# install business_calendar

!pip install business_calendar

Collecting business_calendar
  Downloading business_calendar-0.2.1-py2.py3-none-any.whl (12 kB)
Installing collected packages: business-calendar
Successfully installed business-calendar-0.2.1


In [2]:
# import packages

import numpy as np
import pandas as pd
import os
import csv
from business_calendar import Calendar, MO, TU, WE, TH, FR, SA
from datetime import datetime

## 1. Data Preprocess

In [3]:
# load data and have a look

data = pd.read_csv('/kaggle/input/open-shopee-code-league-logistic/delivery_orders_march.csv')
data.head()

Unnamed: 0,orderid,pick,1st_deliver_attempt,2nd_deliver_attempt,buyeraddress,selleraddress
0,2215676524,1583138397,1583385000.0,,"Baging ldl BUENAVISTA,PATAG.CAGAYAN Buagsong,c...",Pantranco vill. 417 Warehouse# katipunan 532 (...
1,2219624609,1583309968,1583463000.0,1583799000.0,coloma's quzom CASANAS Site1 Masiyan 533A Stol...,"BLDG 210A Moras C42B 2B16,168 church) Complex ..."
2,2220979489,1583306434,1583460000.0,,"21-O LumangDaan,Capitangan,Abucay,Bataan .Bign...","#66 150-C, DRIVE, Milagros Joe socorro Metro M..."
3,2221066352,1583419016,1583556000.0,,"616Espiritu MARTINVILLE,MANUYO #5paraiso kengi...","999maII 201,26 Villaruel Barretto gen.t number..."
4,2222478803,1583318305,1583480000.0,,L042 Summerbreezee1 L2(Balanay analyn Lot760 C...,G66MANILA Hiyas Fitness MAYSILO magdiwang Lt.4...


In [4]:
# find out the city of origin and destination each order, and bind them together

data['origin'] = [address.split()[-1].lower() for address in data['selleraddress']]
data['destination'] = [address.split()[-1].lower() for address in data['buyeraddress']]

data['from_to'] = [i+" "+j for i, j in zip(data['origin'], data['destination'])]
set(data['from_to'])  # check the possible set of origin and destination

{'luzon luzon',
 'manila luzon',
 'manila manila',
 'manila mindanao',
 'manila visayas'}

In [5]:
# use the information in SLA_matrix and create the dictionary of corresponding SLA limit days

sla = {'luzon luzon': 5,
       'manila luzon': 5,
       'manila manila': 3,
       'manila mindanao': 7,
       'manila visayas': 7}

data['SLA_time'] = [sla[i] for i in data['from_to']]
data = data[['orderid', 'pick', '1st_deliver_attempt', '2nd_deliver_attempt', 'SLA_time']]

data.head()

Unnamed: 0,orderid,pick,1st_deliver_attempt,2nd_deliver_attempt,SLA_time
0,2215676524,1583138397,1583385000.0,,3
1,2219624609,1583309968,1583463000.0,1583799000.0,3
2,2220979489,1583306434,1583460000.0,,3
3,2221066352,1583419016,1583556000.0,,3
4,2222478803,1583318305,1583480000.0,,5


## 2. Data Analysis

In [6]:
# create the calendar
# Add holiday 2020/1/1 and 2020/12/31 to avoid warning

cal = Calendar(workdays=[MO, TU, WE, TH, FR, SA], holidays=[datetime(2020, 1, 1), datetime(2020, 3, 25), datetime(2020, 3, 30), datetime(2020, 3, 31), datetime(2020, 12, 31)])

# check the time_zone
# timestamp = 1583137548
# datetime.fromtimestamp(timestamp)

time_zone = 28800  # add 8 hours

In [7]:
# make the solution!
# columns are 'orderid', 'pick', '1st_deliver_attempt', '2nd_deliver_attempt', 'SLA_time'

data = data.to_numpy()
f = open(os.path.join('/kaggle/working', 'output.csv'), 'w')
f.write('orderid,is_late\n')

for row in data:
    start_time = datetime.fromtimestamp(float(row[1]) + time_zone)  # add the time_zone
    first_time = datetime.fromtimestamp(float(row[2]) + time_zone)
    is_delay = first_time.date() > cal.addbusdays(start_time, int(row[4])).date()  # check if late

    if not is_delay and not pd.isna(row[3]):  # the case of 2nd delivery attempt
        second_time = datetime.fromtimestamp(float(row[3]) + time_zone)
        is_delay = second_time.date() > cal.addbusdays(first_time, 3).date()
    f.write(f'{int(float(row[0]))},{int(is_delay)}\n')
    
f.close()

Welcome to leave a message ~T&T~