In [None]:
import rdflib
from rdflib import Graph, Namespace, RDF, RDFS, OWL, URIRef, Literal

In [None]:
g = Graph()

# Define Namespaces
schema  = Namespace("http://schema.org/")
rdf     = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs    = Namespace("http://www.w3.org/2000/01/rdf-schema#")
owl     = Namespace("http://www.w3.org/2002/07/owl#")
xsd     = Namespace("http://www.w3.org/2001/XMLSchema#")
fo      = Namespace("https://purl.org/ontology/fo/")
dbr     = Namespace("http://dbpedia.org/resource/")
ex      = Namespace("http://kg-course/nutrition#")

g.bind("schema", schema)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("owl", owl)
g.bind("xsd", xsd)
g.bind("dbr",dbr)

g.bind("ex", ex)

In [None]:
# Define Classes
g.add((ex.Nutrient, RDF.type, OWL.Class))
g.add((ex.Macronutrient, RDF.type, OWL.Class))
g.add((ex.Mineral, RDF.type, OWL.Class))
g.add((ex.Vitamin, RDF.type, OWL.Class))
g.add((ex.OtherComponent, RDF.type, OWL.Class))
g.add((ex.Carbohydrate, RDF.type, OWL.Class))
g.add((ex.Fat, RDF.type, OWL.Class))
g.add((ex.Omega3Fat, RDF.type, OWL.Class))
g.add((ex.Omega6Fat, RDF.type, OWL.Class))
g.add((ex.MonounsaturatedFat, RDF.type, OWL.Class))
g.add((ex.PolyunsaturatedFat, RDF.type, OWL.Class))
g.add((ex.TransFat, RDF.type, OWL.Class))
g.add((ex.Phytosterols, RDF.type, OWL.Class))
g.add((ex.EssentialAminoAcid, RDF.type, OWL.Class))
g.add((ex.ConditionallyEssentialAminoAcid, RDF.type, OWL.Class))
g.add((ex.NonEssentialAminoAcid, RDF.type, OWL.Class))

g.add((ex.Measurement, RDF.type, OWL.Class))
g.add((ex.DailyValue, RDF.type, OWL.Class))
g.add((ex.FoodItem, RDF.type, OWL.Class))  # Optional if representing specific food

# Define Hierarchies
g.add((ex.Macronutrient, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Mineral, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Vitamin, RDFS.subClassOf, ex.Nutrient))
g.add((ex.OtherComponent, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Carbohydrate, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Fat, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Omega3Fat, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Omega6Fat, RDFS.subClassOf, ex.Nutrient))
g.add((ex.MonounsaturatedFat, RDFS.subClassOf, ex.Nutrient))
g.add((ex.PolyunsaturatedFat, RDFS.subClassOf, ex.Nutrient))
g.add((ex.TransFat, RDFS.subClassOf, ex.Nutrient))
g.add((ex.Phytosterols, RDFS.subClassOf, ex.Nutrient))
g.add((ex.EssentialAminoAcid, RDFS.subClassOf, ex.Nutrient))
g.add((ex.ConditionallyEssentialAminoAcid, RDFS.subClassOf, ex.Nutrient))
g.add((ex.NonEssentialAminoAcid, RDFS.subClassOf, ex.Nutrient))

# Define Object Properties
g.add((ex.hasCategory, RDF.type, OWL.ObjectProperty))
g.add((ex.hasMeasurement, RDF.type, OWL.ObjectProperty))
g.add((ex.hasDailyValue, RDF.type, OWL.ObjectProperty))
g.add((ex.belongsToFoodItem, RDF.type, OWL.ObjectProperty))  # If needed

# Define Datatype Properties
g.add((ex.name, RDF.type, OWL.DatatypeProperty))
g.add((ex.unit, RDF.type, OWL.DatatypeProperty))
g.add((ex.value, RDF.type, OWL.DatatypeProperty))
g.add((ex.dailyPercentage, RDF.type, OWL.DatatypeProperty))

# Define Domains and Ranges
g.add((ex.hasCategory, RDFS.domain, ex.Nutrient))
g.add((ex.hasCategory, RDFS.range, ex.Nutrient))
g.add((ex.hasMeasurement, RDFS.domain, ex.Nutrient))
g.add((ex.hasMeasurement, RDFS.range, ex.Measurement))
g.add((ex.hasDailyValue, RDFS.domain, ex.Nutrient))
g.add((ex.hasDailyValue, RDFS.range, ex.DailyValue))
g.add((ex.belongsToFoodItem, RDFS.domain, ex.Nutrient))
g.add((ex.belongsToFoodItem, RDFS.range, ex.FoodItem))

g.add((ex.name, RDFS.domain, ex.Nutrient))
g.add((ex.unit, RDFS.domain, ex.Measurement))
g.add((ex.value, RDFS.domain, ex.Measurement))
g.add((ex.dailyPercentage, RDFS.domain, ex.DailyValue))

g.add((ex.name, RDFS.range, RDFS.Literal))
g.add((ex.unit, RDFS.range, RDFS.Literal))
g.add((ex.value, RDFS.range, RDFS.Literal))
g.add((ex.dailyPercentage, RDFS.range, RDFS.Literal))

# Save the ontology
g.serialize("nutrition_ontology.ttl", format="turtle")

In [10]:
import csv
import re

def separate_values_and_units(text):
    pattern = r"(\d*\.?\d*)([a-zA-Z%]*)"
    matches = re.findall(pattern, text)
    return [(value.strip(), unit.strip()) for value, unit in matches if value or unit]

url = 'C:/Users/laure/OneDrive/Documents/GitHub/KG_project/Nutrition/Alcoholic_beverage_beer_light_low_carb_14013.csv'
# Use regex to extract the desired part of the URL
match = re.search(r'([^/]+)(?=_\d+\.csv$)', url)
if match:
    result = match.group(1)
    print(result)
with open(url, mode ='r')as file:
  csvFile = csv.reader(file)
  # Skip the first row (header)
  next(csvFile)
  for lines in csvFile:
      measureUnity = separate_values_and_units(lines[2])

#TODO see if the value is not written I replace it with a 0 or leave it empty
category_name = ex.lines[0]
value_name = ex.lines[1]
measure = measureUnity[0]
measure_unit = measureUnity[1]
daily_percentage = ex.lines[3]

# g.add((ex.name, Literal(result)))
# g.add((value_name, RDF.type, ex.category_name))
# g.add((value_name, ex.hasCategory, ex.category_name))
# g.add((value_name, ex.name, Literal(value_name)))
# g.add((value_name, ex.unit, Literal(measure_name)))
# g.add((value_name, ex.value, Literal(measure_name)))
# g.add((value_name, ex.dailyPercentage, Literal(daily_percentage)))

Alcoholic_beverage_beer_light_low_carb
[('354', 'g')]
[('96', '')]
[('0', 'g')]
[('0.6', 'g')]
[('2.6', 'g')]
[('0', 'g')]
[('0', 'g')]
[('0', 'mg')]
[('0', 'g')]
[('2.6', 'g')]
[('0', 'g')]
[('14.2', 'mg')]
[('0', 'mg')]
[('60.2', 'mg')]
[('14.2', 'mg')]
[('28.3', 'mg')]
[('10.6', 'mg')]
[('0.04', 'mg')]
[('0.01', 'mg')]
[('0.02', 'mg')]
[('0', 'mcg')]
[('', 'mcg')]
[('', 'mcg')]
[('', 'mcg')]
[('', 'mg')]
[('', 'mcg')]
[('0', 'mcg')]
[('0', 'mg')]
[('0', 'mg')]
[('0', 'mg')]
[('0', 'mg')]
[('', 'mg')]
[('0', 'mg')]
[('', 'mcg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'IU')]
[('0', 'mcg')]
[('0', 'mcg')]
[('0', 'mg')]
[('0', 'mcg')]
[('', 'mcg')]
[('', 'mcg')]
[('0', 'IU')]
[('0', 'mcg')]
[('', 'mcg')]
[('', 'mcg')]
[('337.7', 'g')]
[('0.21', 'g')]
[('11.7', 'g')]
[('0', 'mg')]
[('0', 'mg')]
[('0.48', '')]
[('', 'g')]
[('', 'g')]
[('', 'g')]
[('', 'g')]
[('0', 'g')]
[('