Our LOD integration with AGROVOC allows us to efficently associate the terms and concepts that we study to URIs provided by an authorative source.

In [2]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from sentence_transformers import SentenceTransformer, util
import pandas as pd
from pprint import pprint

In [4]:
# Load sentence transformer for semantic matching
model = SentenceTransformer('all-MiniLM-L6-v2')

# Setup SPARQL endpoint
sparql = SPARQLWrapper("http://agrovoc.fao.org/sparql")

def query_agrovoc(value):
    query = f"""
    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
    SELECT ?concept ?label WHERE {{
        ?concept skos:prefLabel ?label .
        FILTER(LANG(?label) = "en" && REGEX(?label, "{value}", "i"))
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    matches = []

    for result in results["results"]["bindings"]:
        label = result.get("label", {}).get("value")
        concept = result.get("concept", {}).get("value")
        
        if label and concept:
            matches.append((label, concept))
                
    return matches


def best_match(value, candidates):
    if not candidates:
        return None, None
    input_embedding = model.encode(value, convert_to_tensor=True)
    candidate_texts = [label for label, _ in candidates]
    candidate_embeddings = model.encode(candidate_texts, convert_to_tensor=True)
    scores = util.cos_sim(input_embedding, candidate_embeddings)[0]
    best_idx = scores.argmax().item()
    return candidates[best_idx]

def int_df_agrovoc(df, column):
    agrovoc_labels = []
    agrovoc_uris = []

    for val in df[column]:
        matches = query_agrovoc(val)
        best_label, best_uri = best_match(val, matches)
        agrovoc_labels.append(best_label)
        agrovoc_uris.append(best_uri)

    df["AGROVOC_label"] = agrovoc_labels
    df["AGROVOC_uri"] = agrovoc_uris
    return df


- the EFSA (European Food Safety Authority) food consumption data for data about the consumption of different types of food in various countries

In [None]:
food_consumption_data = pd.read_csv("data/chronic_consumption_gday_allsubjects.csv", encoding="utf-16")
# food_consumption_data.head(5)
# food_consumption_data_agro = int_df_agrovoc(food_consumption_data, "Exposure hierarchy (L3)")

### How much water is used to produce food?
- the FAO AQUASTAT database for information about worldwide water withdrawal


In [None]:
# Load the data on worldwide water withdrawals
world_water_data = pd.read_csv("data/aquastat_world_water_data.csv", encoding="utf-8")


In [None]:
# Define the variables of interest
variables_of_interest = [
    "Agricultural water withdrawal",
    "Industrial water withdrawal",
    "Municipal water withdrawal"
]
# Filter the dataframe
filtered_water = world_water_data[
    (world_water_data["Variable"].isin(variables_of_interest)) &
    (world_water_data["Year"] == 2022) &
    (world_water_data["Area"] == "World")
]

filtered_water

filtered_water_agro = int_df_agrovoc(filtered_water, "Variable")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["AGROVOC_label"] = agrovoc_labels
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["AGROVOC_uri"] = agrovoc_uris


In [None]:
filtered_water_agro

Unnamed: 0,VariableGroup,Subgroup,Variable,Area,Year,Value,Unit,Symbol,IsAggregate,AGROVOC_label,AGROVOC_uri
482,Water use,Water withdrawal by sector,Agricultural water withdrawal,World,2022,2851.590176,10^9 m3/year,E,True,,
2414,Water use,Water withdrawal by sector,Industrial water withdrawal,World,2022,609.89392,10^9 m3/year,E,True,,
4741,Water use,Water withdrawal by sector,Municipal water withdrawal,World,2022,527.233048,10^9 m3/year,E,True,,


### How much land is it used for food production?
- land use data from FAOSTAT

In [11]:

landuse_data = pd.read_csv("data/FAOSTAT_land_use_data.csv", encoding="utf-8")

filtered_landuse = landuse_data[
    (landuse_data["Area"] == "World") &
    (landuse_data["Year"] == 2018) &
    (landuse_data["Element"] == "Area") &
    (landuse_data["Item"].isin(["Country land",
                                "Land area", 
                                "Agriculture", 
                                "Forest land", 
                                "Land used for aquaculture", 
                                "Inland waters used for aquac. or holding facilities", 
                                "Inland waters used for capture fishes", 
                                "Coastal waters used for aquac. or holding facilities",
                                "Coastal waters used for capture fishes",
                                "EEZ used for aquac. or holding facilities",
                                "EEZ used for capture fishes"]))
]

filtered_landuse = filtered_landuse[["Area", "Year", "Item", "Value"]]
filtered_landuse

Unnamed: 0,Area,Year,Item,Value
44,World,2018,Land area,13031520.0
69,World,2018,Agriculture,4841329.0
532,World,2018,Forest land,4068924.0


In [None]:
filtered_landuse_agro = int_df_agrovoc(filtered_landuse, "Item")
filtered_landuse_agro

Unnamed: 0,Area,Year,Item,Value,AGROVOC_label,AGROVOC_uri
44,World,2018,Land area,13031520.0,land area,http://aims.fao.org/aos/agrovoc/c_298db0d3
69,World,2018,Agriculture,4841329.0,agriculture,http://aims.fao.org/aos/agrovoc/c_203
532,World,2018,Forest land,4068924.0,forest land,http://aims.fao.org/aos/agrovoc/c_24843


### What are the emissions of the agricultural sector compared to other sectors?
- greenhouse gas emissions data from Climate Watch

In [14]:
# Load the dataset on greehhouse gas emissions
ghg_emissions_data = pd.read_csv("data/historical_emissions.csv", encoding="utf-8")


### What are the emissions of the different processes carried out in the food sector?
- emissions from FAOSTAT