In [2]:
# Re-import necessary libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set seed for reproducibility
np.random.seed(42)

# Number of rows
num_samples = 20000

# Generate random dates in the last 5 years
start_date = datetime(2019, 1, 1)
date_list = [start_date + timedelta(days=np.random.randint(0, 1825)) for _ in range(num_samples)]

# Food Items (Common donated food types)
food_items = ["Rice", "Pasta", "Milk", "Vegetables", "Fruits", "Bread", "Meat", "Fish", "Eggs", "Canned Food"]
food_item = np.random.choice(food_items, num_samples)

# Quantity Donated & Requested
quantity_donated = np.random.randint(10, 500, num_samples)
quantity_requested = quantity_donated + np.random.randint(-100, 100, num_samples)  # Some variance

# Expiration Date (Within 1-365 days from donation date)
expiration_days = np.random.randint(1, 365, num_samples).astype(int)
expiration_date = [date_list[i] + timedelta(days=int(expiration_days[i])) for i in range(num_samples)]

# Seasonality (Higher impact on fruits & vegetables)
seasonality = np.random.uniform(0.5, 1.5, num_samples) * (food_item == "Fruits") + np.random.uniform(0.8, 1.2, num_samples)

# Holidays (Random binary indicator)
holidays = np.random.choice([0, 1], num_samples)

# Market Prices (Higher for scarce items)
market_prices = np.random.uniform(1, 20, num_samples) * seasonality

# Disaster Events (0-1: No, 1-5: Severity of disaster)
disaster_events = np.random.choice([0, 1, 2, 3, 4, 5], num_samples, p=[0.8, 0.1, 0.05, 0.03, 0.015, 0.005])

# Recipient Preferences (Scale: 1-10)
recipient_preferences = np.random.randint(1, 11, num_samples)

# Delivery Time (Hours, influenced by distance)
distance_traveled = np.random.uniform(5, 500, num_samples)  # Distance in km
delivery_time = distance_traveled / np.random.uniform(30, 80, num_samples)  # Speed factor

# Food Waste Rate (Higher for perishables)
food_waste_rate = np.random.uniform(0.01, 0.2, num_samples) * (food_item == "Milk") + np.random.uniform(0.01, 0.1, num_samples)

# Transportation Mode & Fuel Type
transport_modes = ["Truck", "Electric Vehicle", "Bicycle", "Train", "Drone"]
fuel_types = {"Truck": "Diesel", "Electric Vehicle": "Electric", "Bicycle": "None", "Train": "Diesel", "Drone": "Electric"}

transport_mode = np.random.choice(transport_modes, num_samples)
fuel_type = [fuel_types[mode] for mode in transport_mode]

# Storage Duration (days)
storage_duration = np.random.randint(1, 30, num_samples)

# Energy Used in Storage (Higher for refrigerated items)
refrigeration_required = np.random.choice([0, 1], num_samples, p=[0.7, 0.3])
energy_used_storage = storage_duration * np.random.uniform(0.5, 2.0, num_samples) * (1 + refrigeration_required * 1.5)

# Logistics Cost (Correlated with distance, weight, storage, and disaster events)
logistics_cost = distance_traveled * 0.5 + storage_duration * 2 + disaster_events * 10

# Packaging Type (Different packaging methods)
packaging_types = ["Plastic", "Cardboard", "Glass", "Biodegradable"]
packaging_type = np.random.choice(packaging_types, num_samples)

# Carbon Footprint Calculation (Final Target Variable)
carbon_footprint = (distance_traveled * (transport_mode == "Truck") * 0.1 +
                    energy_used_storage * 0.05 +
                    storage_duration * 0.02 +
                    food_waste_rate * 10 +
                    disaster_events * 2 +
                    np.random.uniform(0, 5, num_samples))  # Random noise

# Create DataFrame
df = pd.DataFrame({
    "Date": date_list,
    "Food Item": food_item,
    "Quantity Donated": quantity_donated,
    "Quantity Requested": quantity_requested,
    "Expiration Date": expiration_date,
    "Seasonality": seasonality,
    "Holidays": holidays,
    "Market Prices": market_prices,
    "Disaster Events": disaster_events,
    "Recipient Preferences": recipient_preferences,
    "Delivery Time": delivery_time,
    "Food Waste Rate": food_waste_rate,
    "Distance Traveled (km)": distance_traveled,
    "Transportation Mode": transport_mode,
    "Storage Duration (days)": storage_duration,
    "Energy Used in Storage (kWh)": energy_used_storage,
    "Carbon Footprint": carbon_footprint,
    "Fuel Type": fuel_type,
    "Logistics Cost": logistics_cost,
    "Packaging Type": packaging_type,
    "Refrigeration Required": refrigeration_required
})

# Save to CSV
file_path = "/mnt/data/food_carbon_footprint_dataset.csv"
df.to_csv(file_path, index=False)

# Return dataset path
file_path


OSError: Cannot save file into a non-existent directory: '\mnt\data'