In [1]:
import urllib.parse
import requests
import pandas as pd
import numpy as np

In [30]:
def transform_data(df_com, df_walla):
    same_columns_wallapop = ['id', 'title', 'images', 'price', 'brand', 'model', 'year', 'km', 'engine', 'horsepower']
    same_columns_coches_com = ['id','image', 'price', 'url' , 'make', 'model' , 'fuel', 'cv', 'km', 'year']
    comon_names = ['id','image','price','make','model','year','km','fuelType','horsepower','cv']
    rename_columns = {
        'engine': 'fuelType',
        'fuel' : 'fuelType',
        'horsepower': 'cv',
        'brand' : 'make',
        'images': 'image',
        'url': 'link'
    }
    delete_columns = ['title', 'url']
    df_walla['images'] = df_walla['images'].apply(lambda x: x[0]['original'] if isinstance(x, list) and len(x) > 0 else None)
    df_walla.rename(columns=rename_columns, inplace=True)

    df_walla.drop(columns=delete_columns, inplace=True, errors='ignore')

    df_com.rename(columns=rename_columns, inplace=True)

    df_com.drop(columns=delete_columns, inplace=True, errors='ignore')

    df_walla = df_walla[[col for col in comon_names if col in df_walla.columns]]
    df_com = df_com[[col for col in comon_names if col in df_com.columns]]
    df_walla['site'] = 'walla'
    df_com['site'] = 'com'
    print(len(df_walla)), print(len(df_com))
    df = pd.concat([df_com, df_walla], ignore_index=True)
    df['price'] = df['price'].fillna(0).replace({'\€': '', '\.': '', ',': ''}, regex=True).astype(int)
    df['km'] = df['km'].fillna(0).replace({'\€': '', '\.': '', ',': ''}, regex=True).astype(int)
    df['year'] = df['year'].fillna(0).astype(int)
    return df

async def get_data_coches_com(make: str, model: str, yearMin: int, yearMax: int, kmMin: int, kmMax: int, priceMin: int, priceMax: int):
    parametros = {
        'tipo_busqueda': '2',
        'seminuevo': '0',
        'ord[]': 'marca_up',
        'searched3': '',
        'color': '',
        'combustible_id': '',
        'precio_desde': priceMin,
        'precio_hasta': priceMax,
        'scf_fee_desde': '',
        'scf_fee_hasta': '',
        'potencia_desde': '',
        'potencia_hasta': '',
        'km_desde': kmMin,
        'km_hasta': kmMax,
        'anyo_desde': yearMin,
        'anyo_hasta': yearMax,
        'cambio': '',
        'puertas': '',
        'plazas': '',
        'vendedor': '',
        'make_list[]': '',
        'transmission_name': '',
        'agent_type_name': '',
        'has_financing': '',
        'reservable': '',
        'search3': model,
    }
    url_base = 'https://www.coches.com/api/vo/pills/?'
    url_completa = url_base + urllib.parse.urlencode(parametros)
    response = requests.get(url_completa)
    df_com = pd.DataFrame()
    if response.ok:
        try:
            data = response.json()
            df_com = pd.DataFrame(data['pills'])
        except Exception as e:
            print("Error", e)
    return df_com


async def get_data_wallapop(make: str, model: str, yearMin: int, yearMax: int, kmMin: int, kmMax: int, priceMin: int, priceMax: int):
    parametros = {
    'filters_source': 'suggester',
    'keywords': make + ' ' + model,
    'category_ids': 100,
    'longitude': -3.69196,
    'latitude': 40.41956,
    'yearMin': yearMin,
    'yearMax': yearMax,
    'kmMin': kmMin,
    'kmMax': kmMax,
    'priceMin': priceMin,
    'priceMax': priceMax
}
    url_base = 'https://api.wallapop.com/api/v3/cars/search?'
    url_completa = url_base + urllib.parse.urlencode(parametros)
    response = requests.get(url_completa)
    df_walla = pd.DataFrame()
    if response.ok:
        try:
            data = response.json()
            rows = []
            for obj in data['search_objects']:
                row = obj['content']
                rows.append(row)
            df_walla = pd.DataFrame(rows)
        except Exception as e:
            print("Error:", e)
    return df_walla

async def search_car(make: str, model: str, yearMin: int, yearMax: int, kmMin: int, kmMax: int, priceMin: int, priceMax: int):
    df = await get_data_coches_com(make, model, yearMin, yearMax, kmMin, kmMax, priceMin, priceMax)
    df3 = await get_data_wallapop(make, model, yearMin, yearMax, kmMin, kmMax, priceMin, priceMax)
    df = transform_data(df , df3)
    if len(df) == 0:
        print('No data found')
        return
    return df




In [31]:
make = 'audi'
model = 'a3'
yearMin = 2010
yearMax = 2021
kmMin = 0
kmMax = 100000
priceMin = 0
priceMax = 100000

df = pd.DataFrame(await search_car(make, model, yearMin, yearMax, kmMin, kmMax, priceMin, priceMax))

0     5500.0
1    20445.0
2    24272.0
3    19500.0
4     8999.0
Name: price, dtype: float64 walla
40
20
              id                                              image  price  \
0        7856169  https://images.coches.com/_vo_/fotos/usados/20...  18995   
1        8197776  https://images.coches.com/_vo_/fotos/usados/20...  21718   
2        8125894  https://images.coches.com/_vo_/fotos/usados/20...  26900   
3        7886428  https://images.coches.com/_vo_/fotos/usados/20...  31900   
4        8117062  https://images.coches.com/_vo_/fotos/usados/20...  26500   
5        8197745  https://images.coches.com/_vo_/fotos/usados/20...  21718   
6        8196427  https://images.coches.com/_vo_/fotos/usados/20...  21718   
7        8191746  https://images.coches.com/_vo_/fotos/usados/20...  17999   
8        7870263  https://images.coches.com/_vo_/fotos/usados/20...  22900   
9        8209162  https://images.coches.com/_vo_/fotos/usados/20...  22900   
10       8197775  https://images.coch

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_walla['site'] = 'walla'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_com['site'] = 'com'
