In [None]:
import requests
import pandas as pd
from opnieuw import retry
from requests.exceptions import HTTPError, ProxyError
from tinydb import TinyDB, Query
import concurrent.futures

### Functions

In [None]:
# Get Wallapop Used Cars
@retry(
    retry_on_exceptions=(ConnectionError, HTTPError, ProxyError),
    max_calls_total=4,
    retry_window_after_first_call_in_seconds=60,
)
def get_listings(step=0, offset=0, start=0, category_ids=100, latitude='40.428207', longitude='-3.679739', search_id='095355bf-701b-40b6-b37a-1740ef9aca5a'):
    url = f"https://api.wallapop.com/api/v3/cars/search?offset={offset}&experiment=not_logged_experiment&filters_source=default_filters&latitude={latitude}&start={start}&time_filter=lastMonth&order_by=most_relevance&step={step}&category_ids={category_ids}&longitude={longitude}&search_id={search_id}"
    payload = {}
    headers = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'es,en-GB;q=0.9,en-US;q=0.8,en;q=0.7',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'DeviceOS': '0',
        'MPID': '8966409109343412630',
        'Origin': 'https://es.wallapop.com',
        'Pragma': 'no-cache',
        'Referer': 'https://es.wallapop.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
        'X-AppVersion': '82350',
        'X-DeviceID': 'fa090a97-4fb2-4423-ae97-bdd5688c75b6',
        'X-DeviceOS': '0',
        'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"'
    }
    response = requests.request("GET", url, headers=headers, data=payload)
    if response.status_code == 200:
        return response.json()
    else:
        print(f'Status Code {response.status_code}')


In [None]:
# Get Wallapop Listing details (HMTL)
@retry(
    retry_on_exceptions=(ConnectionError, HTTPError, ProxyError),
    max_calls_total=4,
    retry_window_after_first_call_in_seconds=60,
)
def get_listing_details(listing_slug):
    url = f"https://es.wallapop.com/item/{listing_slug}"
    payload = {}
    headers = {}
    response = requests.request("GET", url, headers=headers, data=payload)
    if response.status_code == 200:
        return response.content

In [None]:
def iterate_listings(coordinates):
    db=TinyDB(f'D:/Proyecto ironhack/files/datos_coordenadas/listings -{coordinates["latitude"]} - {coordinates["longitude"]}.json')
    table=db.table("listings")
    Serie=Query()
    offset = start = 0
    i = 0
    while True:
        i+=1
        print(f'Iteracion en pagina {i} - Offset {offset}')
        response = get_listings(step=0, offset=offset, start=start, category_ids=100, latitude=coordinates['latitude'], longitude=coordinates['longitude'], search_id='095355bf-701b-40b6-b37a-1740ef9aca5a')
        offset += 40
        start += 40
        if response.get('search_objects', []):
            for listing in response['search_objects']:
                if listing.get('content', None):
                    try:
                        car_listing = {
                            'id': listing['id'],
                            'type': listing['type'],
                            'content_id': listing['content']['id'],
                            'title': listing['content']['title'],
                            'storytelling': listing['content']['storytelling'],
                            'distance': listing['content']['distance'],
                            'user_id': listing['content']['user']['id'],
                            'kind': listing['content']['user']['kind'],
                            'flag_pending': listing['content']['flags']['pending'],
                            'flag_sold': listing['content']['flags']['sold'],
                            'flag_reserved': listing['content']['flags']['reserved'],
                            'flag_banned': listing['content']['flags']['banned'],
                            'flag_expired': listing['content']['flags']['expired'],
                            'flag_onhold': listing['content']['flags']['onhold'],
                            'flag_bumped': listing['content']['visibility_flags']['bumped'],
                            'flag_highlighted': listing['content']['visibility_flags']['highlighted'],
                            'flag_urgent': listing['content']['visibility_flags']['urgent'],
                            'flag_country_bumped': listing['content']['visibility_flags']['country_bumped'],
                            'flag_boosted': listing['content']['visibility_flags']['boosted'],
                            'price': listing['content'].get('price', None),
                            'currency': listing['content'].get('currency', None),
                            'web_slug': listing['content'].get('web_slug', None),
                            'category_id': listing['content'].get('category_id', None),
                            'brand': listing['content'].get('brand', None),
                            'model': listing['content'].get('model', None),
                            'year': listing['content'].get('year', None),
                            'version': listing['content'].get('version', None),
                            'km': listing['content'].get('km', None),
                            'engine': listing['content'].get('engine', None),
                            'gearbox': listing['content'].get('gearbox', None),
                            'horsepower': listing['content'].get('horsepower', None),
                            'favorited': listing['content'].get('favorited', None),
                            'creation_date': listing['content'].get('creation_date', None),
                            'modification_date': listing['content'].get('modification_date', None),
                            'location': listing['content'].get('location', None),
                            'supports_shipping': listing['content'].get('supports_shipping', None),
                        }
                        table.upsert(car_listing, Serie.id==car_listing.get('content_id', 'N/A'))
                    except:
                        pass
        else:
            break
    return f'Ended loop for lat: {coordinates["latitude"]}, long: {coordinates["longitude"]}'

### Main Code

In [None]:
coordenadas = pd.read_csv('D:/Proyecto ironhack/files/spain_coordenates_mesh.csv')
coordinates_list = coordenadas.to_dict('records')

In [None]:
with concurrent.futures.ThreadPoolExecutor() as executor:
    future_to_row = {executor.submit(iterate_listings, coordinates): coordinates for coordinates in coordinates_list}
    for future in concurrent.futures.as_completed(future_to_row):
        print(future.result())