Initialisation

In [89]:
# required libraries
import pandas as pd
import os
from pathlib import Path
# CHECK DATE 
import datetime
# parameters and URLs

#Gotta MAKE IT GLOBAL
path = str(Path(os.path.abspath(os.getcwd())).absolute())
csvurl=path+'/shopping_trends_updated.csv'
owl=path+'/cosumer_habits_ontology.rdf'

# Load the required libraries
from rdflib import Graph, Literal, RDF, URIRef, Namespace
# rdflib knows about some namespaces, like FOAF
from rdflib.namespace import FOAF, XSD
TR = Namespace("http://www.semanticweb.org/aritz/ontologies/2023/10/costumerHabits#")



Importation of the CSV file

In [90]:
# Load the CSV files in memory
trends = pd.read_csv(csvurl, sep=',', index_col='Customer ID')

Creation of the graph

In [91]:
g = Graph()
g.parse(owl, format="xml")
g.bind("TR", TR)

Cleaning of the data

In [92]:
unique_season=trends['Season'].unique()
unique_payment = trends['Payment Method'].unique()

Filling the Graph

In [93]:

for i in range(0,len(unique_season)):
    Season_uri = URIRef(TR + "Season" + str(i))
    g.add((Season_uri, RDF.type, TR.Season))
    g.add((Season_uri, TR['Season'], Literal(unique_season[i], datatype=XSD.string)))

for i in range(0,len(unique_payment)):
    Payment_uri = URIRef(TR + "Payment" + str(i))
    g.add((Payment_uri, RDF.type, TR.Payment))
    g.add((Payment_uri, TR['Payment_Method'], Literal(unique_payment[i], datatype=XSD.string)))
# Iterate over the rows of the DataFrame
for index, row in trends.iterrows():
    # Create a URIRef for the movie based on the 'ID' column
    client_uri = URIRef(TR + "Client" + str(index))
    # Add triples using the movie_uri
    g.add((client_uri, RDF.type, TR.Customer))
    #fill the Client Class
    g.add((client_uri, TR['Age'], Literal(row['Age'], datatype=XSD.int)))
    g.add((client_uri, TR['Gender'], Literal(row['Gender'], datatype=XSD.string)))
    g.add((client_uri, TR['Review_Rating'], Literal(row['Review Rating'], datatype=XSD.string)))
    g.add((client_uri, TR['Discount_Applied'], Literal(row['Discount Applied'], datatype=XSD.string)))

    #fill the Habits Class
    habits_uri = URIRef(TR + "Habits" + str(index))
    g.add((habits_uri, RDF.type, TR.Habits))
    g.add((habits_uri, TR['Previous_Purchases'], Literal(row['Previous Purchases'], datatype=XSD.int)))
    g.add((habits_uri, TR['Frequency_of_Purchases'], Literal(row['Frequency of Purchases'], datatype=XSD.string)))
    g.add((habits_uri, TR['Subscription_Status'], Literal(row['Subscription Status'], datatype=XSD.string)))
    
    #fill the Item Class
    item_uri = URIRef(TR + "Item" + str(index))
    g.add((item_uri, RDF.type, TR.Item))
    g.add((item_uri, TR['Item'], Literal(row['Item Purchased'],datatype=XSD.string)))
    g.add((item_uri, TR['Category'], Literal(row['Category'],datatype=XSD.string)))
    g.add((item_uri, TR['Size'], Literal(row['Size'],datatype=XSD.string)))
    g.add((item_uri, TR['Color'], Literal(row['Color'],datatype=XSD.string)))
    g.add((item_uri, TR['Location'], Literal(row['Location'],datatype=XSD.string)))
    g.add((item_uri, TR['Shipping_Method'], Literal(row['Shipping Type'],datatype=XSD.string)))
    g.add((item_uri, TR['Promo_Code_Used'], Literal(row['Promo Code Used'], datatype=XSD.string)))

    #fill the Price Class
    Price_uri = URIRef(TR + "Price" + str(index))
    g.add((Price_uri, RDF.type, TR.Price))
    g.add((Price_uri, TR['Price'], Literal(row['Purchase Amount (USD)'], datatype=XSD.float)))

    #Create the relation
    #Client--Item
    g.add((client_uri, TR['hasBought'], item_uri))
    #ITEM--Price
    g.add((item_uri, TR['hasCosted'], Price_uri))
    #Customer--Habits
    g.add((client_uri, TR['hasHabits'], habits_uri))
    #Customer-Price
    g.add((client_uri, TR['payed'], Price_uri))

    for i in range(0,len(unique_payment)):
        if(row['Payment Method']==unique_payment[i]):
            g.add((Price_uri, TR['payedBy'], URIRef(TR + "Payment" + str(i))))

    #Item--Season
    for i in range(0,len(unique_season)):
        if(row['Season']==unique_season[i]):
            g.add((item_uri, TR['payedWhen'], URIRef(TR + "Season" + str(i))))
    
    #Habits--Payment
    g.add((habits_uri, TR['preferedMethod'], Payment_uri))



Serialization

In [94]:

print("--- saving serialization ---")
with open(path + '/Trends_file_already_created.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

--- saving serialization ---
