# Block 1: Data Ingestion
This notebook fetches real-time discounted food items using the Salling Group API and processes them for further analysis.


In [1]:
import requests
import pandas as pd
import json


In [2]:
# Your API token
token = "SG_APIM_CM1M3GXGSA98V8PJ19BQDJPM238NHTTS5TVT7JM7Y2E2VEDBFQQ0"
headers = {
    "Authorization": f"Bearer {token}"
}


In [None]:

def fetch_food_waste_data(zip_code):
    url = "https://api.sallinggroup.com/v1/food-waste/?zip={zip_code}"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        api_data = response.json()
        if isinstance(api_data, list):
            api_data = api_data[0]  # ✅ Fix here
        return api_data
    else:
        print(f"Error fetching for zip {zip_code}: {response.status_code}")
        return None


In [9]:
api_data = fetch_food_waste_data(2400)
print(json.dumps(api_data, indent=2))


⚠️ API request failed with status code 401
{}


In [None]:
import pandas as pd

def create_products_dataframe(api_data, zip_code):
    products = []
    
    if not api_data or "clearances" not in api_data:
        print(f"⚠️ No clearances for ZIP {zip_code}")
        return pd.DataFrame()
    
    for item in api_data["clearances"]:
        try:
            product = item.get("product", {})
            offer = item.get("offer", {})
            
            categories = product.get("categories", {})
            category_full = categories.get("en") or categories.get("da") or "Unknown>Unknown"

            category_parts = category_full.split(">")
            broad = category_parts[0].strip() if len(category_parts) > 0 else "Unknown"
            least = category_parts[1].strip() if len(category_parts) > 1 else "Unknown"
            mid = category_parts[2].strip() if len(category_parts) > 2 else "Unknown"
            fine = category_parts[3].strip() if len(category_parts) > 3 else "Unknown"

            # Determine final_category using the rule-based priority
            if fine != "Unknown":
                final = fine
            elif mid != "Unknown":
                final = mid
            elif least != "Unknown":
                final = least
            elif broad != "Unknown":
                final = broad
            else:
                final = "Unknown"

            products.append({
                "zip_code": zip_code,
                "broad_category": broad,
                "least_category": least,
                "mid_category": mid,
                "fine_category": fine,
                "final_category": final,
                "description": product.get("description", "Unknown"),
                "original_price": offer.get("originalPrice"),
                "new_price": offer.get("newPrice"),
                "discount": offer.get("discount"),
            })
        
        except Exception as e:
            print(f"⚠️ Skipping one item due to error: {e}")
            continue
    
    return pd.DataFrame(products)


In [None]:
#8000, 2400, 2000, 2100, 2200, 2300, 2500, 2600
zip_codes = [2700, 2800, 3100, 3000]

all_products = [] 

for zip_code in zip_codes:
    api_data = fetch_food_waste_data(zip_code)
    df = create_products_dataframe(api_data, zip_code)
    all_products.append(df)

df_all_products = pd.concat(all_products, ignore_index=True)


print(df_all_products.head())


In [None]:
df_all_products.to_csv("food_waste_products_combined.csv", index=False)
print("CSV file saved!")
