# Mercadona API Data Extraction

### About the project
##### *This project aims to conduct a comprehensive analysis and comparison of supermarket prices.*
 Primary focus of this notebook is on extracting data from Mercadona's API server. The data extraction process begins with querying general endpoint categories, followed by iterative exploration through category-specific endpoints to obtain detailed information about various products. The notebook documents this systematic approach, providing insights into the diverse range of products and their corresponding pricing within Mercadona's inventory. The ultimate goal is to derive valuable comparisons and trends in supermarket pricing, contributing to a deeper understanding of market dynamics.

In [58]:
import pandas as pd
import requests 
import json 

In [59]:
url = "https://tienda.mercadona.es/api/categories/"
data = requests.get(url)
data = data.json()

In [60]:
data_res = data["results"]

dc_mercadona={"cat1":[], "cat2" : [], "id" : []}
for d_cat1 in data_res:
      for d_cat2 in d_cat1["categories"]:
        cat2 = d_cat2["name"]
        id = d_cat2["id"]
        cat1 = d_cat1["name"]

        dc_mercadona["cat1"].append(cat1)
        dc_mercadona["cat2"].append(cat2)
        dc_mercadona["id"].append(id)


In [61]:
df_cat = pd.DataFrame(dc_mercadona)
# df_cat.to_csv("../data/dynamic/mercadona/categories.csv", index=False)


In [62]:
url2= "https://tienda.mercadona.es/api/categories/112"
data_endpoint = requests.get(url2)
data_endpoint = data_endpoint.json()
print(data_endpoint.keys())


dict_keys(['id', 'name', 'order', 'layout', 'published', 'categories', 'is_extended'])


In [63]:
def get_product_by_id(id):
    url2= f"https://tienda.mercadona.es/api/categories/{id}"
    data_endpoint = requests.get(url2)
    data_endpoint = data_endpoint.json()

    data_endpoint_cat = data_endpoint["categories"]

    dc_mercadona_2 = {"id": [], "name": [], "packaging": [], "unit_price": [], "iva": [], "unit_size": [], "size_format":[], "price_decreased": []}

    for d_cat3 in data_endpoint_cat:
        for d_cat4 in d_cat3["products"]:        
            dc_mercadona_2["packaging"].append(d_cat4["packaging"])
            dc_mercadona_2["id"].append(data_endpoint["id"])
            dc_mercadona_2["name"].append(d_cat4["display_name"]) 
            dc_mercadona_2["unit_price"].append(d_cat4["price_instructions"]["unit_price"])
            dc_mercadona_2["iva"].append(d_cat4["price_instructions"]["iva"])
            dc_mercadona_2["unit_size"].append(d_cat4["price_instructions"]["unit_size"])
            dc_mercadona_2["size_format"].append(d_cat4["price_instructions"]["size_format"])
            dc_mercadona_2["price_decreased"].append(d_cat4["price_instructions"]["price_decreased"])

    return pd.DataFrame(dc_mercadona_2)


In [64]:
get_product_by_id(27)

Unnamed: 0,id,name,packaging,unit_price,iva,unit_size,size_format,price_decreased
0,27,Plátano de Canarias IGP,Pieza,0.32,0,0.16,kg,False
1,27,Banana,Pieza,0.26,0,0.18,kg,False
2,27,Plátano macho,Pieza,0.95,0,0.33,kg,False
3,27,Uva blanca sin semillas,Bandeja,2.55,0,0.5,kg,False
4,27,Uva roja sin semillas,Bandeja,2.55,0,0.5,kg,False
5,27,Manzana Golden,Pieza,0.44,0,0.21,kg,False
6,27,Manzanas Golden,Bolsa,2.84,0,1.5,kg,False
7,27,Pera Conferencia,Pieza,0.45,0,0.18,kg,False
8,27,Peras Conferencia,Bandeja,2.99,0,1.5,kg,False
9,27,Manzanas rojas dulces,Bolsa,3.59,0,1.5,kg,False


In [74]:
from tqdm import tqdm

df_mercadona_segunda = []

# Use tqdm to create a progress bar for the loop
for i in tqdm(df_cat["id"].unique(), desc="Processing Products"):
   # try:
   df_mercadona_segunda.append(get_product_by_id(i))
   # except Exception as e:
   #    print(f'Failed in id {i} with error: {e}')
   #    continue



Processing Products: 100%|██████████| 152/152 [00:23<00:00,  6.54it/s]


In [76]:
df_md_prod = pd.concat(df_mercadona_segunda, ignore_index=True)

In [77]:
df_md_prod[df_md_prod["name"].str.contains("Quitagrasas")]

Unnamed: 0,id,name,packaging,unit_price,iva,unit_size,size_format,price_decreased
3773,230,Quitagrasas Bosque Verde perfume Marsella,Botella,1.4,21,0.75,l,False
3774,230,Quitagrasas sin espuma Bosque Verde perfume limón,Botella,1.45,21,0.75,l,False
3775,230,Quitagrasas KH-7,Botella,3.7,21,0.78,l,False


In [78]:
df_md_prod["id"].nunique()

152

In [79]:
df_md_prod.isnull().sum()

id                   0
name                 0
packaging          469
unit_price           0
iva                  0
unit_size           90
size_format          0
price_decreased      0
dtype: int64

In [80]:
df_md_prod[df_md_prod["packaging"].isnull()]

Unnamed: 0,id,name,packaging,unit_price,iva,unit_size,size_format,price_decreased
699,92,Huevo de chocolate Hacendado,,1.40,10,0.050,kg,False
700,92,Huevo crema al cacao,,1.25,10,0.025,kg,False
781,216,Papilla fresa y plátano +8 meses Hacendado,,0.90,10,0.120,kg,False
782,216,Papilla fruta variada +6 meses Hacendado,,0.90,10,0.120,kg,False
783,216,Papilla plátano y manzana +6 meses Hacendado,,0.90,10,0.120,kg,False
...,...,...,...,...,...,...,...,...
4866,142,Brioche Listo para Comer,,3.00,10,0.160,kg,False
4875,105,Bífidus probiótico con ciruelas pasas Hacendado,,1.30,10,0.500,kg,False
5020,107,Postre lácteo infantil de fresa y plátano Hace...,,0.85,10,0.100,kg,False
5021,107,Postre lácteo infantil de pera Hacendado +8 meses,,0.85,10,0.100,kg,False


In [81]:
# df_md_prod.to_csv("../data/dynamic/mercadona/products.csv", index=False)

In [82]:
df_mercadona_dyn = pd.merge(df_cat, df_md_prod, on="id")
df_mercadona_dyn.to_csv("../data/dynamic/mercadona/final_table_1102.csv")
