# Block 1: Data Ingestion
This notebook fetches real-time discounted food items using the Salling Group API and processes them for further analysis.


In [1]:
import requests
import pandas as pd
import json


In [2]:
# Your API token
token = "SG_APIM_CM1M3GXGSA98V8PJ19BQDJPM238NHTTS5TVT7JM7Y2E2VEDBFQQ0"
headers = {
    "Authorization": f"Bearer {token}"
}


In [3]:
def fetch_food_waste_data(zip_code):
    url = f"https://api.sallinggroup.com/v1/food-waste/?zip={zip_code}"
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            api_data = response.json()
            if isinstance(api_data, list) and api_data:
                return api_data
            elif isinstance(api_data, dict):
                return [api_data]
            else:
                print("⚠️ Empty or unrecognized format received from API")
                return []
        else:
            print(f"⚠️ API request failed with status code {response.status_code}")
            return []
    except Exception as e:
        print(f"⚠️ Error making API request: {e}")
        return []


In [4]:
api_data = fetch_food_waste_data(2400)
print(json.dumps(api_data, indent=2))


[
  {
    "clearances": [
      {
        "offer": {
          "currency": "DKK",
          "discount": 7.95,
          "ean": "5712580948928",
          "endTime": "2025-04-30T21:59:59.000Z",
          "lastUpdate": "2025-04-29T14:20:34.000Z",
          "newPrice": 8,
          "originalPrice": 15.95,
          "percentDiscount": 49.84,
          "startTime": "2025-04-29T05:37:49.000Z",
          "stock": 2,
          "stockUnit": "each"
        },
        "product": {
          "categories": {
            "da": "Br\u00f8d & kager>Rugbr\u00f8d>Kernerugbr\u00f8d",
            "en": "Bread And Cakes>Rye Breads>Seeded Rye Breads"
          },
          "description": "KERNERUGBR\u00d8D \u00d8GO",
          "ean": "5712872401629",
          "image": "https://digitalassets.sallinggroup.com/image/upload/e_trim/c_limit,e_sharpen:80,f_auto,q_auto,w_400,h_400/3225f60fdf5caeb7a5d6b11962ce5081"
        }
      },
      {
        "offer": {
          "currency": "DKK",
          "discount": 5,
  

In [5]:
def create_products_dataframe(api_data_list, zip_code, max_splits=10):
    products = []

    if not api_data_list:
        print(f"⚠️ No data returned for ZIP {zip_code}")
        return pd.DataFrame()

    for store_data in api_data_list:
        clearances = store_data.get("clearances", [])
        store_info = store_data.get("store", {})
        store_name = store_info.get("name", "Unknown Store")
        store_address = store_info.get("address", {})
        store_street = store_address.get("street", "Unknown Street")

        if not clearances:
            continue

        for item in clearances:
            try:
                product = item.get("product", {})
                offer = item.get("offer", {})

                categories = product.get("categories", {})
                category_full = categories.get("en") or categories.get("da") or ""

                # Extract category splits (and pad)
                parts = [p.strip() for p in category_full.split(">")]
                parts += [""] * (max_splits - len(parts))

                row_data = {
                    "zip_code": zip_code,
                    "store_name": store_name,
                    "store_street": store_street,
                    "description": product.get("description", ""),
                    "original_price": offer.get("originalPrice"),
                    "new_price": offer.get("newPrice"),
                    "discount": offer.get("discount"),
                }

                # Assign category1 through categoryN
                for i in range(max_splits):
                    row_data[f"category{i+1}"] = parts[i]

                # Set final_category as the last non-"Unknown" category from the parts
                final = next((cat for cat in reversed(parts) if cat != ""), "")
                row_data["final_category"] = final

                products.append(row_data)

            except Exception as e:
                print(f"⚠️ Skipping item due to error: {e}")
                continue

    return pd.DataFrame(products)


In [6]:
#8000, 2400, 2000, 2100, 2200, 2300, 2500, 2600
zip_codes = [2400]

all_products = [] 

for zip_code in zip_codes:
    api_data = fetch_food_waste_data(zip_code)
    df = create_products_dataframe(api_data, zip_code)
    all_products.append(df)

df_all_products = pd.concat(all_products, ignore_index=True)


print(df_all_products.head())


   zip_code       store_name   store_street                    description  \
0      2400  Netto Emdrupvej  Emdrupvej 107               KERNERUGBRØD ØGO   
1      2400  Netto Emdrupvej  Emdrupvej 107  JUICE HINDBÆR SOLBÆR INNOCENT   
2      2400  Netto Emdrupvej  Emdrupvej 107           MANGO/PASS LØGISMOSE   
3      2400  Netto Emdrupvej  Emdrupvej 107            KRABBESALAT K-SALAT   
4      2400  Netto Emdrupvej  Emdrupvej 107           SKOVBÆR DRIK ACTIMEL   

   original_price  new_price  discount               category1  \
0           15.95          8      7.95         Bread And Cakes   
1           20.00         15      5.00               Beverages   
2           15.00          6      9.00  Dairy And Cold Storage   
3           19.95         10      9.95  Dairy And Cold Storage   
4           20.00         12      8.00  Dairy And Cold Storage   

         category2                     category3  \
0       Rye Breads             Seeded Rye Breads   
1  Juice Smoothies            

In [7]:
df_all_products.to_csv("food_waste_products_combined.csv", index=False)
print("CSV file saved!")


CSV file saved!
