In [1]:
import requests
import time
import csv
import pandas as pd
import numpy as np
import json

### API Delivery


In [2]:
url = "https://gappapi.deliverynow.vn/api/delivery/get_browsing_infos"

In [3]:
headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'en-US,en;q=0.9,vi;q=0.8',
    'content-type': 'application/json;charset=UTF-8',
    'e300897e': 'X;-C<+uaM^^Gg/QmIe4/\\4.G>i0UcRA-t>#*EBM4%Mb.0#C`D&/n5:0*>((:%<=c-VRO4@N8jJg[U)NbfG\':C."n_f#`S3&(,Ajfd@5:g&XkO-I7os`@P2:M+:9UTLm_2WA1/jeV%-6#:$$8R)2?gY"ub"OF8Ctl18^\'hqr51mM4U,1IL!r,q!O&BFr>o.9?9*"jd#H4DLaWsj>$Qu',
    'origin': 'https://shopeefood.vn',
    'priority': 'u=1, i',
    'referer': 'https://shopeefood.vn/',
    'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'cross-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
    'x-foody-access-token': '',
    'x-foody-api-version': '1',
    'x-foody-app-type': '1004',
    'x-foody-client-id': '',
    'x-foody-client-language': 'vi',
    'x-foody-client-type': '1',
    'x-foody-client-version': '3.0.0',
    'x-sap-ri': 'b6340e67f44e00273584ff3a63a6f59ee1610c835b52ccd1'
}

In [4]:
def chunk_list(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

#### Hàm xử lý các batch


In [5]:
def process_batches(delivery_ids, url, headers):
    all_delivery_infos = []
    batch_fail = []
    batch_number = 0

    # Lặp qua từng nhóm 500 phần tử và gửi request
    for chunk in chunk_list(delivery_ids, 500):
        batch_number += 1
        data = {"delivery_ids": chunk}
        print(
            f"Processing batch {batch_number}, with delivery IDs from {chunk[0]} to {chunk[-1]}...")

        try:
            response = requests.post(url, headers=headers, json=data)

            if response.status_code == 200:
                result = response.json()

                if result.get('reply') == '':
                    print(f"Batch {batch_number} failed: Server error")
                    batch_fail.append(chunk)
                    continue

                delivery_infos = result['reply'].get('delivery_infos', [])
                all_delivery_infos.extend(delivery_infos)
                print(
                    f"Batch {batch_number} succeeded: Collected {len(delivery_infos)} results, total results so far: {len(all_delivery_infos)}")
            else:
                print(
                    f"Batch {batch_number} failed with status code: {response.status_code}")
                batch_fail.append(chunk)

        except Exception as e:
            print(f"Batch {batch_number} encountered an error: {e}")
            batch_fail.append(chunk)

        time.sleep(2)

    return all_delivery_infos, batch_fail

#### Hàm chạy lại các batch bị lỗi


In [13]:
def retry_failed_batches(failed_ids, url, headers):
    retry_count = 0
    total_retry_infos = []
    
    while failed_ids and retry_count < 10:
        retry_count += 1
        retry_infos, batch_fail = process_batches(failed_ids, url, headers)
        total_retry_infos.extend(retry_infos)
        failed_ids = [id_ for failed_batch in batch_fail for id_ in failed_batch]

        print(f"After retry attempt {retry_count}, collected {len(retry_infos)} new results. Total successful results: {len(total_retry_infos)}")
    
    print("All batches have been processed successfully.")
    return total_retry_infos

#### Lấy dữ liệu


In [17]:
# Tạo danh sách delivery_ids
max_id = 400000
delivery_ids = list(range(1, max_id))

# Gọi hàm để xử lý các batch lần đầu
all_delivery_infos, batch_fail = process_batches(delivery_ids, url, headers)

print(f"Total delivery_infos collected: {len(all_delivery_infos)}")

Processing batch 1, with delivery IDs from 1 to 500...
Batch 1 failed with status code: 403
Processing batch 2, with delivery IDs from 501 to 1000...
Batch 2 failed with status code: 403
Processing batch 3, with delivery IDs from 1001 to 1500...
Batch 3 failed with status code: 403
Processing batch 4, with delivery IDs from 1501 to 2000...
Batch 4 failed with status code: 403
Processing batch 5, with delivery IDs from 2001 to 2500...
Batch 5 succeeded: Collected 86 results, total results so far: 86
Processing batch 6, with delivery IDs from 2501 to 3000...
Batch 6 succeeded: Collected 90 results, total results so far: 176
Processing batch 7, with delivery IDs from 3001 to 3500...
Batch 7 failed with status code: 403
Processing batch 8, with delivery IDs from 3501 to 4000...
Batch 8 succeeded: Collected 87 results, total results so far: 263
Processing batch 9, with delivery IDs from 4001 to 4500...
Batch 9 succeeded: Collected 137 results, total results so far: 400
Processing batch 10, 

#### Chạy lại những batch bị fail


In [18]:
failed_ids = [id_ for failed_batch in batch_fail for id_ in failed_batch]
retry_infos = retry_failed_batches(failed_ids, url, headers)

Processing batch 1, with delivery IDs from 1 to 500...
Batch 1 failed with status code: 403
Processing batch 2, with delivery IDs from 501 to 1000...
Batch 2 failed with status code: 403
Processing batch 3, with delivery IDs from 1001 to 1500...
Batch 3 failed with status code: 403
Processing batch 4, with delivery IDs from 1501 to 2000...
Batch 4 failed with status code: 403
Processing batch 5, with delivery IDs from 3001 to 3500...
Batch 5 succeeded: Collected 106 results, total results so far: 106
Processing batch 6, with delivery IDs from 4501 to 5000...
Batch 6 succeeded: Collected 163 results, total results so far: 269
Processing batch 7, with delivery IDs from 5001 to 5500...
Batch 7 succeeded: Collected 116 results, total results so far: 385
Processing batch 8, with delivery IDs from 6501 to 7000...
Batch 8 failed with status code: 403
Processing batch 9, with delivery IDs from 7001 to 7500...
Batch 9 failed with status code: 403
Processing batch 10, with delivery IDs from 7501

In [19]:
# all_delivery_infos.extend(retry_infos)
total_delivery_infos = all_delivery_infos + retry_infos
print(f"Total delivery_infos collected: {len(total_delivery_infos)}")

Total delivery_infos collected: 240255


#### Ghi dữ liệu vô file csv


In [20]:
with open('data/delivery_info.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow([
        'id', 'total_review', 'avg', 'display_total_review', 
        'city_id', 'restaurant_id', 'name', 
        'categories', 'cuisines', 'address', 
        'min_order_value', 'price_range', 'is_quality_merchant', 
        'latitude', 'longitude', 'brand_id', 'brand_name', 
        'open_time', 'close_time', 'url'
    ])
    
    for info in total_delivery_infos:

        if info.get('foody_service_id') == 1:
            writer.writerow([
                info.get('id'),
                info.get('rating', {}).get('total_review', 0),
                info.get('rating', {}).get('avg', 0),
                info.get('rating', {}).get('display_total_review', ''),
                info.get('city_id', ''),
                info.get('restaurant_id', ''),
                info.get('name', ''),
                ', '.join(info.get('categories', [])),
                ', '.join(info.get('cuisines', [])),
                info.get('address', ''),
                info.get('min_order_value', {}),
                info.get('price_range', {}),
                info.get('is_quality_merchant', False),
                info.get('position', {}).get('latitude', ''),
                info.get('position', {}).get('longitude', ''),
                info.get('brand_id', ''), 
                info.get('brand', {}).get('name', ''), 
                info.get('operating', {}).get('open_time', ''),  
                info.get('operating', {}).get('close_time', ''),  
                info.get('url', '')
            ])

In [None]:
delivery_df = pd.read_csv('data/delivery_info.csv')
delivery_df.head()

Unnamed: 0,id,total_order,total_review,avg,display_total_review,city_id,district_id,restaurant_id,restaurant_url,name,...,cuisines,location_url,address,min_order_value,price_range,is_quality_merchant,is_open,latitude,longitude,url
0,7,0,1000,4.8,999+,217,5,32742,banh-trang-tron-chu-vien,Bánh Tráng Trộn Chú Viên,...,Món Việt,ho-chi-minh,"38 Nguyễn Thượng Hiền, P. 5, Quận 3, TP. HCM","{'resource_name': 'delivery_order_minimum', 'r...","{'resource_name': 'avg_price_format', 'resourc...",False,True,10.771209,106.685979,https://shopeefood.vn/ho-chi-minh/banh-trang-t...
1,8,0,500,4.6,500+,217,1,43725,xoi-mit-si-gon-giao-hang-tan-noi,Xôi Mít Sì Gòn - Giao Hàng Tận Nơi,...,Món Việt,ho-chi-minh,"187/5/6 Cô Giang, P. Cô Giang, Quận 1, TP. HCM","{'resource_name': 'delivery_order_minimum', 'r...","{'resource_name': 'avg_price_format', 'resourc...",False,True,10.761436,106.693697,https://shopeefood.vn/ho-chi-minh/xoi-mit-si-g...
2,9,0,1000,4.9,999+,217,5,69760,bun-moc-rom-map-vo-van-tan,Bún Mọc Ròm Mập - Võ Văn Tần,...,Món Việt,ho-chi-minh,"457 Võ Văn Tần, P. 5, Quận 3, TP. HCM","{'resource_name': 'delivery_order_minimum', 'r...","{'resource_name': 'avg_price_format', 'resourc...",True,True,10.769463,106.683691,https://shopeefood.vn/ho-chi-minh/bun-moc-rom-...
3,10,0,1000,4.8,999+,217,5,3041,tan-tong-loi-mi-sui-cao,Tân Tòng Lợi - Mì Sủi Cảo & Vịt Tiềm,...,"Món Trung Hoa, Món Á",ho-chi-minh,"311 Võ Văn Tần, P. 5, Quận 3, TP. HCM","{'resource_name': 'delivery_order_minimum', 'r...","{'resource_name': 'avg_price_format', 'resourc...",False,True,10.771538,106.685744,https://shopeefood.vn/ho-chi-minh/tan-tong-loi...
4,11,0,1000,4.8,999+,217,5,20873,quan-thao-banh-trang-tron,Bánh Tráng Trộn Cô Thảo,...,Món Việt,ho-chi-minh,"90 Nguyễn Thượng Hiền, P. 5, Quận 3, TP. HCM","{'resource_name': 'delivery_order_minimum', 'r...","{'resource_name': 'avg_price_format', 'resourc...",True,True,10.772374,106.685164,https://shopeefood.vn/ho-chi-minh/quan-thao-ba...
