In [None]:
import requests
import json
import math
import datetime
from pprint import pprint

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

In [None]:
def getAdsData(params: str):
    """Get ad data from page"""
    # Search URL
    url = "https://www.donedeal.ie/search/api/v4/find/"

    # POST Request
    headers = {
        "Content-Type": "application/json",
        "Accept": "application/json",
    }

    adsData = requests.post(
        url, data=params, headers=headers, verify=False, allow_redirects=False
    )
    return adsData.json()


In [None]:
def gen_params_str(start_index: int = 0, num_results: int = 30) -> str:
    head_params = {"adType": "forsale", "max": start_index+num_results, "start": start_index, "section": "cars"}
    dependant_params = {
        "parentName": "make",
        "parentValue": "Audi",
        "childName": "model",
        "childValues": ["A3", "A4", "A5", "A6", "A7", "A8", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "S4", "S5", "S6", "S7", "S8"],
    }
    end_params = {"price_to": "100000", 
                  "year_from": "2010"}

    param_str = f"""{str(head_params)[:-1]}, "dependant":[{{{str(dependant_params)[1:]}], {str(end_params)[1:]}"""
    param_str = param_str.replace("'", '"').replace(" ", "")

    return param_str

In [None]:
# Display available ad keys
# for key in ads_data[0].keys():
#     print(f"'{key}':{ads_data[0][key]}")

# def write_available_optionsfile(fout: str= "optional_keys.txt") -> None:
#     keys = {key:"False" for key in ads_data[0].keys()}
#     with open(fout, "w") as f:
#         f.write(json.dumps(keys))
# write_available_optionsfile("optional_keys2.txt")

# def load_optionsfile(fname: str= "optional_keys.txt") -> dict:
#     with open(fname) as f_in:
#         options = json.load(f_in)
#     options = {key:eval(value) for key,value in options.items()} #eval str s to True or False
#     return options

# options = load_optionsfile("optional_keys.txt")


options = {
    "id": True,
    "userId": False,
    "dealerId": False,
    "state": True,
    "age": True,
    "publishDate": True,
    "header": True,
    "currency": True,
    "price": True,
    "county": True,
    "section": False,
    "emailResponse": False,
    "phoneResponse": False,
    "wanted": False,
    "forSale": False,
    "c2b": False,
    "mediaCount": True,
    "friendlyUrl": True,
    "displayAttributes": True,
    "photos": False,
    "oldPriceView": False,
    "keyInfo": False,
    "imageAlt": False,
    "financeSummary": False,
    "deliveryAvailable": False,
    "herdwatchVerified": False,
    "bumpable": False,
    "spotlightable": False,
    "spotlight": False,
    "userSaved": False,
    "adAnalytics": False,
    "priceOnRequest": False,
    "dealer": True,
    "meritsV2": False,
    "greenlightVerified": False,
    "greenlightBadgeUrl": False,
}

selected_keys = [key for key, value in options.items() if value]


In [None]:
def simplify_display_attributes(att: list) -> dict:
    return {line["displayName"]: line["value"] for line in att}
# dist_attr = ads_data[0]['displayAttributes']
# pprint(dist_attr)
# pprint(simplify_display_attributes(dist_attr))

def simplify_dealer_attributes(att: dict) -> dict:
    """Cut down the number of entries and append dealer to each"""
    selected_keys = [
        "establishedYear",
        "franchiseCount",
        "franchiseType",
        "franchisesDisplay",
        "latitude",
        "longitude",
        "name",
        "totalAds",
    ]
    return {f"dealer_{k}": v for k, v in att.items() if k in selected_keys}
# dealer_attr = ads_data[0]['dealer']
# pprint(dealer_attr)
# pprint(simplify_dealer_attributes(dealer_attr))

def convert_price_toEUR(price: str, currency: str, GBP_ex_rate: float = 1.1311222) -> float:
    """Convert price strings to euro integers"""
    priceNum = int(price.replace(",", ""))

    if currency == "EUR":
        return priceNum

    if currency == "GBP":
        priceNum = math.ceil(priceNum * GBP_ex_rate)
        return priceNum
    
    else:
        raise ValueError(f"Unknown Currency detected: {currency}")
# currencies = set([ad['currency'] for ad in clean_ad_data])
# print(currencies)
# 
# ad = ads_data[0]
# print(f"Currency: {ad['currency']}")
# print(f"Price: {ad['price']}")
# ad['price_EUR'] = convert_price_toEUR(ad['price'], ad['currency'])
# print(f"Price: €{ad['price_EUR']}")


def convert_mileage_tokm(mileage_str: str) -> float:
    """Convert mileage strings to ints and convert to kilometres"""
    if len(mileage_str) == 0:
        return -1

    if mileage_str[:-2] == "mi":
        multiplier = 1.60934
    else:
        multiplier = 1

    mileage_str = mileage_str[:-3]

    kmVal = int(mileage_str.replace(",", ""))

    # If the user has given a value of 120 miles, they mean 120,000 miles
    # If mileage is over 1 million, divide by 10
    if kmVal < 1000:
        kmVal = kmVal * 1000
    elif kmVal > 1000000:
        kmVal = kmVal // 10

    return math.ceil((kmVal * multiplier)/100)*100
# mileage = clean_ad_data[0]['Mileage']
# pprint(mileage)
# print(convert_mileage_tokm(mileage))

def clean_ad(ad: dict) -> dict:
    ad = {k: v for k, v in ad.items() if k in selected_keys}

    ad = ad | simplify_display_attributes(ad["displayAttributes"])
    ad.pop("displayAttributes", None)

    try:
        ad = ad | simplify_dealer_attributes(ad["dealer"])
        ad.pop("dealer", None)
        ad["dealer"] = True
    except KeyError:
        ad["dealer"] = False

    ad['Price [EUR]'] = convert_price_toEUR(ad['price'], ad['currency'])

    ad['Mileage [km]'] = convert_mileage_tokm(ad['Mileage'])
    ad['Power'] = int(ad['Power'][:-3])
    ad.pop("Mileage", None)

    return ad


# params = gen_params_str()
# ads_data = getAdsData(params)["ads"]
# clean_ad_data = [clean_ad(ad) for ad in ads_data]

def remove_repeated_ids(ad_data):
    seen_ids = list()
    for i in range(len(ad_data)-1, -1, -1):
        if ad_data[i]['id'] not in seen_ids:
            seen_ids.append(ad_data[i]['id'])
        else:
            # print(f"Deleting repeated id: {ad_data[i]['id']} at index: {i}")
            del ad_data[i]
    return ad_data

def robust_unique_id_check(ad_data, VERBOSE=True):
    unique_ids = set([ad['id'] for ad in ad_data])
    num_repeated_entries = len(ad_data) - len(unique_ids)

    if num_repeated_entries == 0:
        if VERBOSE: print("No repeated entries found")
        return ad_data
    
    if VERBOSE: print(f"{num_repeated_entries} repeated entries found of {len(ad_data)} entries pulled")
    ad_data = remove_repeated_ids(ad_data)

    if set([ad['id'] for ad in ad_data]) == len(ad_data):
        if VERBOSE: print("Repeated ids removed. Remaining ids confirmed to be unique")
        return ad_data
    else:
        raise ValueError("Repeated ids remaining after cleaning")



In [None]:
def save_to_timestamped_json(data, fname:str) -> str:
    fout = f"{fname}_{datetime.datetime.now().strftime('%y%m%d-%H%M%S')}.json"
    with open(fout, "w") as fp:
        json.dump(data, fp)
    return fout

In [None]:
def collect_DoneDeal_Data():
    ad_data = list()
    start_index = 0
    num_results = 30
    i = 0
    raw_ad_data = list()
    
    while True:
        params = gen_params_str(start_index=start_index, num_results=num_results)
        # print(f"Param String: {params}")
        raw_ad_data.append(getAdsData(params)["ads"])
        if i>0:
            if raw_ad_data[i] == raw_ad_data[i-1]:
                print("EXIT: NewData is same as previous data")
                break

        ad_data = [*ad_data, *[clean_ad(ad) for ad in raw_ad_data[i]]]
        if len(raw_ad_data[i])<30:
            print("EXIT: Less than 30 items remaining indicating this is the last page")
            break

        start_index += num_results
        i +=1
    unique_ad_data = robust_unique_id_check(ad_data, VERBOSE=True)
    return unique_ad_data

In [None]:
def main() -> None:
    # ad_data = collect_DoneDeal_Data()
    # print(f"Completed collecting data. {len(ad_data)} entries found")
    
    # fout = save_to_timestamped_json(ad_data, "ad_data")
    # print(f"Search results written to {fout}")

    fname = 'ad_data_230408-213459.json'
    with open(fout, "r") as fp:
        ad_data = json.load(fp)

    print(f"Completed loading data. {len(ad_data)} entries found. Unique: {len(set([ad['id'] for ad in ad_data])) == len (ad_data)}")

In [None]:
if __name__ == "__main__":
    main()