## Importation

In [2]:
!pip install rdflib
import pandas as pd
import csv

Collecting rdflib
  Downloading rdflib-6.0.2-py3-none-any.whl (407 kB)
[K     |████████████████████████████████| 407 kB 43.7 MB/s 
Installing collected packages: rdflib
Successfully installed rdflib-6.0.2


## Tableau des propriétés

@prefix schema: <http://schema.org> 
@prefix foaf: <http://xmlns.com/foaf/0.1/>
@prefix wkd: <https://www.wikidata.org/wiki/Property:>
| Colonne  | Prédicat 
|-|-|
id | URI de base de notre noeud
legislatureLast | custom
civ | schema:gender  
nom | schema:firstName  
prenom  | schema:givenName  
naissance | schema:birthDate  
age | inclassable (déduit de birthDate)
groupe | schema:memberOf
groupeAbrev | wkd:P1813
departementNom | schema:addressRegion
departementCode | schema:addressRegion
circo | wkd:P768
datePriseFonction | schema:Date
job | schema:jobTitle
mail | schema:email // wkd:P968
twitter | wkd:P2002
facebook | wkd:P2013
website | wkd:P856
nombreMandats | custom
experienceDepute | custom
scoreParticipation | custom
scoreParticipationSpecialite | custom
scoreLoyaute | custom
scoreMajorite | custom
active | custom
dateMaj | osef  

  
Un (blank?) node electedPlace avec un type Place qui a ensuite des propriétés sur le pays la zone geo etc  
Connexion vers un node group de type Organization / GovernmentOrganization avec les membres, le nom l'abreviation le site


Député -> https://www.wikidata.org/wiki/Q1055894

Personne

Groupe

Localisation -> schema:AdministrativeArea

Vote  -> Schema:Legislation || https://www.wikidata.org/wiki/Q686822

Votants

Followers -> https://www.wikidata.org/wiki/Property:P8687

| Colonne  | Prédicat 
|-|-|
civ | schema:gender  
nom | schema:firstName  
prenom  | schema:givenName  
naissance | schema:birthDate  
age | inclassable (déduit de birthDate)
job | schema:jobTitle

| Colonne  | Prédicat 
|-|-|
groupe | schema:memberOf // wkd:Q7278
groupeAbrev | wkd:P1813
departementNom | schema:addressRegion
departementCode | schema:addressRegion
circo | wkd:P768

| Colonne  | Prédicat 
|-|-|
mail | schema:email // wkd:P968
twitter | wkd:P2002
facebook | wkd:P2013
website | wkd:P856

| Colonne  | Prédicat 
|-|-|
ID | schema:identifier
datePriseFonction | wkd:P580
nombreMandats | custom
experienceDepute | custom
scoreParticipation | custom
scoreParticipationSpecialite | custom
scoreLoyaute | custom
scoreMajorite | custom
active | custom

In [None]:
properties = ['schema: ', 'schema', 'schema:gender', 'schema:firstName',...]

## dataToRDF 
- **entrées** : 
    - f : Le fichier csv d'origine.
    - properties : Les différents prédicats.
    - ref : "" L'origine de l'objet.
    - rdfType : Le type du sujet. 
    - mailDef : Le prédicats mail.
- **sortie**  : Un string "au format RDF"

In [32]:
from slugify import slugify

def dataToRDF(f, pred=[], ref=[], rdfType="schema:Person", mailDef= "schema:mail", blankNodeId= 0, multivalue="", url="schema:url", nul=""):
    ## Chargement du csv dans un dataframe pandas
    df = pd.read_csv(f, na_values=nul)

    ## Si le prédicat est vide, alors les variables 
    ## du dataset sont choisis
    if pred == []:
        pred = df.columns

    l = len(pred)
    lNew = []
    for i, row in df.iterrows():

        blank_node = slugify(row[blankNodeId])
        # Ajout du type 
        lNew.append(str("\n:" + str(blank_node) + " rdf:type " + str(rdfType) + " ."))
        for j in range(0,l):
            # Si un objet est bien renseigné, on l'ajoute au RDF
            if str(row[j]) != 'nan': 
                sujet = ":" + str(blank_node)
                predicat = " " + str(pred[j]) + " "

                if pred[j] == mailDef:# Si adresse mail 
                    obj = "<mailto:"+ str(row[j]) + "> ."
                elif pred[j] == url :
                    obj = "<" + str(row[j]) + "> ."
                else : 
                    if ref[j] == "":
                        obj = "\"" + str(row[j]) + "\" ."
                    else:
                        obj = " " + str(ref[j])+ slugify(str(row[j])) + " ."
                
                ## Si il y a plusieurs valeurs associées
                if pred[j] == multivalue:
                    tab = str(row[j]).split(";")
                    for val in tab:
                        # On ajoute le nouveau triplet
                        lNew.append(str(sujet + predicat + "\"" +val + "\" ."))
                else :
                    # On ajoute le nouveau triplet
                    lNew.append(str(sujet + predicat + obj))
    # Retourne le string du fichier RDF
    return "\n".join(lNew)

def RDFToFile(RDF, nameFile = "default.ttl", prefix=[]): 
    with open(nameFile, "w") as text_file:
        for p in prefix: # On ajoute tout les préfixes
            text_file.write(str("@prefix " + str(p) + " .\n"))
        # On passe une ligne
        text_file.write(str("\n"))
        # Ecriture du fichier 
        text_file.write(RDF)


## Main

#### CSV Deputés

In [41]:
typeRDF = "schema:Person"
properties = ["schema:identifier", "custom:legislatureLast", "schema:genre", "schema:firstName", "schema:givenName", "schema:birthdate", "custom:age", "schema:memberOf", "schema:addressRegion", "wkd:P768", "custom:startOccupation", "schema:jobTitle", "schema:email", "wkd:P2002", "wkd:P2013", "wkd:P856", "custom:nombreMandats", "custom:experienceDepute", "custom:scoreParticipation", "custom:scoreParticipationSpecialite", "custom:scoreLoyaute", "custom:scoreMajorite", "custom:active", "custom:dateMaj"]
refs = ["","","","","","","","group:","loc:","","","","","","","","","","","","","","","",""]
bI = 0 # Blank Node ID
pref = ["rdf: <http://w3c.org/1999/02/22-rdf-syntax-ns#>","schema: <http://schema.org/>", "wkd: <https://www.wikidata.org/wiki/Property:>", "loc: <https://fuseki.dolr.es/loc/>", "group: <https://fuseki.dolr.es/group/>", "custom: <https://fuseki.dolr.es/ontology/>", ": <https://fuseki.dolr.es/depute/>"]
s = dataToRDF("CSV/deputes.csv", pred=properties, ref=refs)
RDFToFile(s, nameFile = "ttl/depute.ttl", prefix = pref)

#### CSV Localisation

In [37]:
# tR : Le type RDF 
tR = "schema:AdministrativeArea"
# Les différents prédicats
p = ["schema:name", "schema:address"]

# Si jamais l'objet n'est pas une constante (""). Par exemple si c'est une personne, faire le lien avec le graph personne "perso :" (perso est un préfix)
refs = ["",""]
# Index de la "clé", "l'identifiant" (je sais plus le nom)
bI = 1 
# Liste des préfixes utilisés
pref = ["rdf: <http://w3c.org/1999/02/22-rdf-syntax-ns#>","schema: <http://schema.org/>", ": <https://dolr.es/loc/>"]

s = dataToRDF("CSV/loc.csv", ref=refs, rdfType=tR, blankNodeId=bI, pred=p)
RDFToFile(s, nameFile = "ttl/loc.ttl", prefix = pref)

#### CSV Votes

In [None]:
# tR : Le type RDF 
tR = "schema:Legislation"
# Les différents prédicats
properties = ["schema:identifier","","schema:gender","schema:firstName","schema:givenName","","","schema:memberOf","","","","","","schema:jobTitle","","","","","","","","","","","",""]
# Si jamais l'objet n'est pas une constante (""). Par exemple si c'est une personne, faire le lien avec le graph personne "perso :" (perso est un préfix)
refs = ["","","","","","","","","","","","","","","","","","","","","","","","","",""]
# Index de la "clé", "l'identifiant" (je sais plus le nom)
bI = 0
# Liste des préfixes utilisés
pref = ["rdf: <http://w3c.org/1999/02/22-rdf-syntax-ns#>","schema: <http://schema.org/>", "loc: <http://LeNotre/loc>", ": <https://dolr.es/votes/>"]
s = dataToRDF("CSV/votes.csv", ref=refs)
RDFToFile(s, nameFile = "votes.ttl", prefix = pref)

KeyboardInterrupt: 

#### CSV Groupes

In [31]:
# tR : Le type RDF 
tR = "schema:Organization"
# Les différents prédicats
p = ["schema:identifier", "schema:name", "schema:alternateName", "schema:foundingDate", "schema:dissolutionDate", "schema:url", "schema:ethicsPolicy", "schema:ethicsPolicy"]

# Si jamais l'objet n'est pas une constante (""). Par exemple si c'est une personne, faire le lien avec le graph personne "perso :" (perso est un préfix)
refs = ["","", "", "", "", "", "", "", ""]
# Index de la "clé", "l'identifiant" (je sais plus le nom)
bI = 1
# Liste des préfixes utilisés
pref = ["rdf: <http://w3c.org/1999/02/22-rdf-syntax-ns#>","schema: <http://schema.org/>", ": <https://dolr.es/groups/>"]

s = dataToRDF("CSV/groupes.csv", ref=refs, rdfType=tR, blankNodeId=bI, pred=p, multivalue="schema:ethicsPolicy", nul="-")
RDFToFile(s, nameFile = "ttl/groupes.ttl", prefix = pref)

In [None]:
!pip3 uninstall slugify
!pip3 install python-slugify



Deputés

In [None]:
PREFIX schema: <https://schema.org/>
PREFIX wkd: <https://www.wikidata.org/wiki/Property:>
PREFIX : <https://dolr.es/>

CONSTRUCT {
  ?parliamentMemberId a :ParliamentMember;
    schema:gender ?civ;
    schema:firstName ?prenom_low;
    schema:givenName ?nom;
    schema:birthDate ?naissance;
    schema:jobTitle ?job;

    schema:Organization ?organizationId;
    
    schema:AdministrativeArea ?locId;
    wkd:P768 ?circo;

    #wkd:P580 ?datePriseFonction;
    #:nombreMandats ?nombreMandats;
    #:experienceDepute ?experienceDepute;
    #:scoreParticipation ?scoreParticipation;
    #:scoreParticipationSpecialite ?scoreParticipationSpecialite;
    #:scoreLoyaute ?scoreLoyaute;
    #:scoreMajorite ?scoreMajorite;
    #:active ?active;

    schema:email ?email;
    schema:url ?website;
    wkd:P2002 ?twitterUsername;
    wkd:P2013 ?facebook;
    
} 
FROM <file:deputes-historique.csv>
WHERE {
  BIND (URI(CONCAT('https://dolr.es/', ?id)) AS ?parliamentMemberId)
  BIND (CONCAT('mailto:', ?mail) AS ?email)
  BIND (REPLACE(?twitter, '@', '') AS ?twitterUsername)

  BIND(REPLACE(?groupe,'é','e') AS ?groupe1)
  BIND(REPLACE(?groupe1,'à','a') AS ?groupe2)
  BIND(REPLACE(?groupe2,'ç','c') AS ?groupe3)
  BIND(REPLACE(?groupe3,' ','-') AS ?groupe4)
  BIND(REPLACE(?groupe4,',','') AS ?groupe5)
  BIND(LCASE(?groupe5) AS ?groupe6)
  BIND (URI(CONCAT('https://dolr.es/groups/', ?groupe6)) AS ?organizationId)
  BIND (URI(CONCAT('https://dolr.es/locations/', ?departementCode)) AS ?locId)
}

SyntaxError: invalid syntax (1404801722.py, line 1)

Groupes

In [None]:
PREFIX schema: <https://schema.org/>

CONSTRUCT {
  ?organizationId a schema:Organization;
    schema:identifier ?id;
    schema:name ?nom;
    schema:alternateName ?abreviation;
    schema:foundingDate ?anneeCreation;
    schema:url ?url;
    schema:ethicsPolicy ?valeurs;
} 
FROM <file:groupes.csv>
WHERE {
  BIND(REPLACE(?nom,'é','e') AS ?nom1)
  BIND(REPLACE(?nom1,'à','a') AS ?nom2)
  BIND(REPLACE(?nom2,'ç','c') AS ?nom3)
  BIND(REPLACE(?nom3,' ','-') AS ?nom4)
  BIND(REPLACE(?nom4,',','') AS ?nom5)
  BIND(LCASE(?nom5) AS ?nom6)
  BIND (URI(CONCAT('https://dolr.es/groups/', ?nom6)) AS ?organizationId)

  # ?ethics apf:strSplit (?valeurs ';')
  # BIND(?ethics AS ?ethic)
  # filter(BOUND(?ethic))
}

Locs

In [None]:
PREFIX schema: <https://schema.org/>

CONSTRUCT {
  ?locId a schema:AdministrativeArea;
    schema:name ?departementNom;
    schema:address ?departementCode;
} 
FROM <file:loc.csv>
WHERE {
  BIND (URI(CONCAT('https://dolr.es/locations/', ?departementCode)) AS ?locId)
}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=31db4137-da76-438e-92fb-6bb1432c3c77' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>