In [1]:
# Installations
!pip install rdflib

You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
# Imports
import pandas as pd
import numpy as np
import rdflib
from rdflib import Namespace, Literal, URIRef
from decimal import Decimal

In [3]:
# Load csv-files
courses              = pd.read_csv("Courses.csv")
course_instances     = pd.read_csv("Course_Instances.csv")
course_plannings     = pd.read_csv("Course_plannings.csv")
programs             = pd.read_csv("Programmes.csv")
program_courses      = pd.read_csv("Programme_Courses.csv")
students             = pd.read_csv("Students.csv")
course_registrations = pd.read_csv("Registrations.csv")
teachers             = pd.read_csv("Senior_Teachers.csv")
teaching_assistants  = pd.read_csv("Teaching_Assistants.csv")
assigned_hours       = pd.read_csv("Assigned_Hours.csv")
reported_hours       = pd.read_csv("Reported_Hours.csv")

# Extend existing dataframes
# courses          = pd.merge(courses, programs, left_on="Owned By", right_on="Programme name")
course_instances = pd.merge(course_instances, course_plannings, left_on="Instance_id", right_on="Course")

# Define missing dataframes
employees        = pd.concat([teachers, teaching_assistants], ignore_index=True)
departments      = courses[["Department"]]
departments      = departments.drop_duplicates()
divisions        = teachers[["Department name", "Division name"]]
time_reports     = pd.merge(assigned_hours, reported_hours, left_on=["Teacher Id", "Course Instance"], right_on=["Teacher Id", "Course code"], how="inner")

# Drop unnecessary columns
courses.drop(columns=["Department"], inplace=True)
course_instances.drop(columns=["Course"], inplace=True)
employees.drop(columns=["Department name"], inplace=True)
teachers.drop(columns=["Teacher name", "Department name", "Division name"], inplace=True)
teaching_assistants.drop(columns=["Teacher name", "Department name", "Division name"], inplace=True)
time_reports.drop(columns=["Course code_x", "Course code_y", "Study Period", "Academic Year"], inplace=True)

In [4]:
# Rename columns
courses      = courses.rename(columns={"Course name": "courseName",
                                       "Course code": "courseCode",
                                       "Credits":     "credits",
                                       "Level":       "level",
                                       "Division":    "GivenBy",
                                       "Owned By":    "OwnedBy"})

course_instances = course_instances.rename(columns={"Course code":                "InstanceOf",
                                                    "Study period":               "studyPeriod",
                                                    "Academic year":              "instanceYear",
                                                    "Instance_id":                "instanceId",
                                                    "Examiner":                   "ExaminedBy",
                                                    "Planned number of Students": "numStudents",
                                                    "Senior Hours":               "teacherHours",
                                                    "Assistant Hours":            "assistantHours"})

programs = programs.rename(columns={"Programme name":  "programName",
                                    "Programme code":  "programCode",
                                    "Director":        "DirectedBy",
                                    "Department name": "BelongsTo"})

program_courses = program_courses.rename(columns={"Programme code": "PartOf",
                                                  "Study Year":     "studyYear",
                                                  "Academic Year":  "academicYear",
                                                  "Course":         "MapsTo",
                                                  "Course Type":    "courseType"})

students = students.rename(columns={"Student name": "studentName",
                                    "Student id":   "studentId",
                                    "Programme":    "EnrolledTo",
                                    "Year":         "enrollmentYear",
                                    "Graduated":    "graduated"})

course_registrations = course_registrations.rename(columns={"Course Instance": "RegistrationFor",
                                                            "Student id":      "Includes",
                                                            "Status":          "status",
                                                            "Grade":           "grade"})

employees = employees.rename(columns={"Teacher name":  "employeeName",
                                      "Teacher id":    "employeeId",
                                      "Division name": "EmployedBy"})

teachers = teachers.rename(columns={"Teacher id": "teacherId"})

teaching_assistants = teaching_assistants.rename(columns={"Teacher id": "teachinAssistantId"})

departments = departments.rename(columns={"Department": "departmentName"})

divisions = divisions.rename(columns={"Department name": "DivisionOf",
                                      "Division name":   "divisionName"})

time_reports = time_reports.rename(columns={"Teacher Id":      "HoursByEmployee",
                                            "Course Instance": "HoursDoneIn",
                                            "Hours_x":         "assignedHours",
                                            "Hours_y":         "reportedHours"})

In [5]:
# Create an RDF graph
g = rdflib.Graph()

# Define namespaces
base                = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/")
course              = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Course/")
course_instance     = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/CourseInstance/")
program             = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Program/")
program_course      = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/ProgramCourse/")
student             = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Student/")
course_registration = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/CourseRegistration/")
employee            = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Employee/")
teacher             = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Teacher/")
teaching_assistant  = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/TeachingAssistant/")
department          = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Department/")
division            = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/Division/")
time_report         = Namespace("http://www.semanticweb.org/marti/ontologies/2023/3/Assignment_2/TimeReport/")

# Bind namespace prefixes to graph
g.namespace_manager.bind("",                   base)
g.namespace_manager.bind("course",             course)
g.namespace_manager.bind("courseInstance",     course_instance)
g.namespace_manager.bind("program",            program)
g.namespace_manager.bind("programCourse",      program_course)
g.namespace_manager.bind("student",            student)
g.namespace_manager.bind("courseRegistration", course_registration)
g.namespace_manager.bind("employee",           employee)
g.namespace_manager.bind("teacher",            teacher)
g.namespace_manager.bind("teachingAssistant",  teaching_assistant)
g.namespace_manager.bind("department",         department)
g.namespace_manager.bind("division",           division)
g.namespace_manager.bind("timeReport",         time_report)

In [6]:
# Add triples for courses
for _, row in courses.iterrows():
    course_uri = rdflib.URIRef(course + str(row["courseCode"]))

    g.add((course_uri, rdflib.RDF.type, base.Course))
    g.add((course_uri, base.courseCode, Literal(row["courseCode"])))
    g.add((course_uri, base.courseName, Literal(row["courseName"])))
    g.add((course_uri, base.credits,    Literal(Decimal(str(row["credits"])))))
    g.add((course_uri, base.level,      Literal(row["level"])))
    g.add((course_uri, base.GivenBy,    division[row["GivenBy"]]))
    g.add((course_uri, base.OwnedBy,    program[str(row["OwnedBy"])]))

In [7]:
# Add triples for course instances
for _, row in course_instances.iterrows():
    course_instance_uri = rdflib.URIRef(course_instance + str(row["instanceId"]))

    g.add((course_instance_uri, rdflib.RDF.type,     base.CourseInstance))
    g.add((course_instance_uri, base.InstanceOf,     course[str(row["InstanceOf"])]))
    g.add((course_instance_uri, base.ExaminedBy,     teacher[row["ExaminedBy"]]))
    g.add((course_instance_uri, base.instanceId,     Literal(row["instanceId"])))
    g.add((course_instance_uri, base.instanceYear,   Literal(row["instanceYear"])))
    g.add((course_instance_uri, base.studyPeriod,    Literal(int(row["studyPeriod"]))))
    g.add((course_instance_uri, base.numStudents,    Literal(int(row["numStudents"]))))
    g.add((course_instance_uri, base.teacherHours,   Literal(int(row["teacherHours"]))))
    g.add((course_instance_uri, base.assistantHours, Literal(int(row["assistantHours"]))))

In [8]:
# Add triples for programs
for _, row in programs.iterrows():
    program_uri = rdflib.URIRef(program + str(row["programCode"]))
    
    g.add((program_uri, rdflib.RDF.type,  base.Program))
    g.add((program_uri, base.programCode, Literal(row["programCode"])))
    g.add((program_uri, base.programName, Literal(row["programName"])))
    g.add((program_uri, base.DirectedBy,  teacher[row["DirectedBy"]]))
    g.add((program_uri, base.BelongsTo,   department[row["BelongsTo"]]))

In [9]:
# Add triples for program courses
for index, row in program_courses.iterrows():
    program_course_uri = rdflib.URIRef(program_course + str(index))

    g.add((program_course_uri, rdflib.RDF.type,    base.ProgramCourse))
    g.add((program_course_uri, base.PartOf,        program[str(row["PartOf"])]))
    g.add((program_course_uri, base.studyYear,     Literal(int(row["studyYear"]))))
    g.add((program_course_uri, base.academicYear,  Literal(row["academicYear"])))
    g.add((program_course_uri, base.MapsTo,        course[str(row["MapsTo"])]))
    g.add((program_course_uri, base.courseType,    Literal(row["courseType"])))

In [10]:
# Add triples for students
for _, row in students.iterrows():
    student_uri = rdflib.URIRef(student + str(row["studentId"]))

    g.add((student_uri, rdflib.RDF.type,     base.Student))
    g.add((student_uri, base.studentId,      Literal(row["studentId"])))
    g.add((student_uri, base.studentName,    Literal(row["studentName"])))
    g.add((student_uri, base.EnrolledTo,     program[str(row["EnrolledTo"])]))
    g.add((student_uri, base.enrollmentYear, Literal(int(row["enrollmentYear"]))))
    g.add((student_uri, base.graduated,      Literal(row["graduated"])))

In [11]:
# Add triples for course registrations
for index, row in course_registrations.iterrows():
    course_registration_uri = rdflib.URIRef(course_registration + str(index))

    g.add((course_registration_uri, rdflib.RDF.type,      base.CourseRegistration))
    g.add((course_registration_uri, base.RegistrationFor, course_instance[row["RegistrationFor"]]))
    g.add((course_registration_uri, base.Includes,        student[row["Includes"]]))
    g.add((course_registration_uri, base.status,          Literal(row["status"])))
    g.add((course_registration_uri, base.grade,           Literal("F") if np.isnan(row["grade"]) else Literal(str(row["grade"]))))


In [12]:
# Add triples for employees
for _, row in employees.iterrows():
    employee_uri = rdflib.URIRef(employee + str(row["employeeId"]))

    g.add((employee_uri, rdflib.RDF.type,   base.Employee))
    g.add((employee_uri, base.employeeId,   Literal(row["employeeId"])))
    g.add((employee_uri, base.employeeName, Literal(row["employeeName"])))
    g.add((employee_uri, base.EmployedBy,   division[row["EmployedBy"]]))

In [13]:
# Add triples for teachers
for _, row in teachers.iterrows():
    teacher_uri = rdflib.URIRef(teacher + str(row["teacherId"]))

    g.add((teacher_uri, rdflib.RDF.type, base.Teacher))
    g.add((teacher_uri, base.teacherId,  Literal(row["teacherId"])))

In [14]:
# Add triples for teaching assistants
for _, row in teaching_assistants.iterrows():
    teaching_assistant_uri = rdflib.URIRef(teaching_assistant + str(row["teachinAssistantId"]))

    g.add((teaching_assistant_uri, rdflib.RDF.type,          base.TeachingAssistant))
    g.add((teaching_assistant_uri, base.teachingAssistantId, Literal(row["teachinAssistantId"])))

In [15]:
# Add triples for departments
for _, row in departments.iterrows():
    department_uri = rdflib.URIRef(department + str(row["departmentName"]))

    g.add((department_uri, rdflib.RDF.type,     base.Department))
    g.add((department_uri, base.departmentName, Literal(row["departmentName"])))

In [16]:
# Add triples for divisions
for _, row in divisions.iterrows():
    division_uri = rdflib.URIRef(division + str(row["divisionName"]))

    g.add((division_uri, rdflib.RDF.type,   base.Division))
    g.add((division_uri, base.divisionName, Literal(row["divisionName"])))
    g.add((division_uri, base.DivisionOf,   department[row["DivisionOf"]]))

In [17]:
# Add triples for time reports
for index, row in time_reports.iterrows():
    time_report_uri = rdflib.URIRef(time_report + str(index))

    g.add((time_report_uri, rdflib.RDF.type,      base.TimeReport))
    g.add((time_report_uri, base.HoursByEmployee, employee[row["HoursByEmployee"]]))
    g.add((time_report_uri, base.HoursDoneIn,     course_instance[row["HoursDoneIn"]]))
    g.add((time_report_uri, base.assignedHours,   Literal(int(float(str(row["assignedHours"]))))))
    g.add((time_report_uri, base.reportedHours,   Literal(int(float(str(row["reportedHours"]))))))

In [18]:
# Generate file with the triples
with open("Triples.ttl", "wb") as turtlefile:
    turtlefile.write(g.serialize(format="turtle", skip_relative_uri_check=True).encode('utf-8'))

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=783a6b23-d952-496f-afd0-255b4b141320' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>