In [115]:
import pandas as pd
import numpy as np
import re
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', None)

In [2]:
mmds_course_metadata = pd.read_csv('./MMDS_Courses.csv')
mmds_course_metadata.head()

Unnamed: 0,code,Name of Module,Offered,Language,ECTS,moduleCategory
0,CS 450,Programming Course,HWS,E,6,Fundamentals
1,CS 460,Database Technology,FSS,E,6,Fundamentals
2,CS 470,Python for Data Scientists,FSS,E,6,Fundamentals
3,,Multivariate Analyses,HWS,E,6,Fundamentals
4,,Tutorial Multivariate Analyses,HWS,E,2,Fundamentals


In [80]:
mmds_course_metadata[mmds_course_metadata.code.isna()]

Unnamed: 0,code,Name of Module,Offered,Language,ECTS,moduleCategory
3,,Multivariate Analyses,HWS,E,6,Fundamentals
4,,Tutorial Multivariate Analyses,HWS,E,2,Fundamentals
5,,Empirische Methoden der Politik- wissenschaft,HWS,G/E,6,Fundamentals
46,,Legal and Ethical Aspects of Privacy,HWS,E,3,Responsible Data Science


In [33]:
mmds_modules = mmds_course_metadata.moduleCategory.value_counts().rename_axis('module').reset_index(name="counts")
#mmds_modules['modules_'] = mmds_modules.module.apply(lambda x:re.sub(" ","_",x))
mmds_modules

Unnamed: 0,module,counts
0,Data Analytics,24
1,Data Management,15
2,Fundamentals,6
3,Responsible Data Science,3


### Creating URIs Of COurse Code and module Category

#### Programs : https://www.wim.uni-mannheim.de/en/academics/programs/

        - Base URL : https://www.uni-mannheim.de/en/academics/
        - MMDS : https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-data-science/
        - BI.  : https://www.uni-mannheim.de/en/academics/programs/masters-program-in-business-informatics/
        
        - COurses : https://www.uni-mannheim.de/en/academics/courses/

In [214]:
# libraries for creating name spaces
import rdflib
from urllib.parse import quote
from rdflib import Graph, URIRef, Literal, BNode, Namespace
from rdflib.namespace import FOAF, RDF, OWL, RDFS, XSD

In [192]:
baseURL = Namespace('https://www.uni-mannheim.de/en/academics/')
mmdsModuleURL = Namespace(baseURL+'programs/mannheim-master-in-data-science/modules/')
mmdsURL = Namespace(baseURL+'programs/mannheim-master-in-data-science/')
courseURL = Namespace(baseURL+'courses/')
propertyURL = Namespace(baseURL+'property/')
professorURL = Namespace(baseURL+'professor/')
def create_uri(name:str,baseURL):
    '''Takes a string and returns a valid namespace URI'''
    quoted = quote(name.replace(" ", "_"))
    return baseURL[quoted]

In [44]:
create_uri('',mmdsURL)

rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-data-science/')

## MMDS Module URIs

In [45]:
mmds_modules['moduleURI'] = mmds_modules.module.apply(lambda x: create_uri(x,mmdsModuleURL))
mmds_modules

Unnamed: 0,module,counts,moduleURI
0,Data Analytics,24,https://www.uni-mannheim.de/en/academics/progr...
1,Data Management,15,https://www.uni-mannheim.de/en/academics/progr...
2,Fundamentals,6,https://www.uni-mannheim.de/en/academics/progr...
3,Responsible Data Science,3,https://www.uni-mannheim.de/en/academics/progr...


In [46]:
#example
print(mmds_modules.moduleURI[0])

https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-data-science/modules/Data_Analytics


## MMDS Course URIs

In [82]:
mmds_course_metadata[mmds_course_metadata.code.isna()]

Unnamed: 0,code,Name of Module,Offered,Language,ECTS,moduleCategory
3,,Multivariate Analyses,HWS,E,6,Fundamentals
4,,Tutorial Multivariate Analyses,HWS,E,2,Fundamentals
5,,Empirische Methoden der Politik- wissenschaft,HWS,G/E,6,Fundamentals
46,,Legal and Ethical Aspects of Privacy,HWS,E,3,Responsible Data Science


In [87]:
mmds_courses = mmds_course_metadata[mmds_course_metadata.code.notna()].copy()
mmds_courses['courseURI'] = mmds_courses.code.str.lower()+' '+mmds_courses['Name of Module'].str.lower()
mmds_courses['courseURI'] = mmds_courses.courseURI.apply(lambda x: create_uri(x,courseURL))
print(mmds_courses['courseURI'][0])

https://www.uni-mannheim.de/en/academics/courses/cs_450_programming_course


In [91]:
mmds_courses.head()

Unnamed: 0,code,Name of Module,Offered,Language,ECTS,moduleCategory,courseURI
0,CS 450,Programming Course,HWS,E,6,Fundamentals,https://www.uni-mannheim.de/en/academics/cours...
1,CS 460,Database Technology,FSS,E,6,Fundamentals,https://www.uni-mannheim.de/en/academics/cours...
2,CS 470,Python for Data Scientists,FSS,E,6,Fundamentals,https://www.uni-mannheim.de/en/academics/cours...
6,AC 651,Additional Course – Data Manage- ment,HWS/FSS,E,AC 651*,Data Management,https://www.uni-mannheim.de/en/academics/cours...
7,CS 500,Advanced Software Engineering,HWS,E,6,Data Management,https://www.uni-mannheim.de/en/academics/cours...


In [149]:
mmds_course_data = pd.read_csv('./mmds_course_data.csv')
mmds_course_data.head()

Unnamed: 0,module_code,module_name,property,property_value
0,CS 500,Advanced Software Engineering,Admission requirements,"nan,-"
1,CS 500,Advanced Software Engineering,Aim of module,"aspects of software systems/components. Key examples,include structural views represented using class diagrams,,operational views expressed using constraint languages and,behavioral views expressed using state diagrams. An,important focus of the course is the use of these views to,define tests and extra-functional properties.,Expertise:,After taking the course, students will be familiar with the latest,state-of-the-art techniques for specifying the externally visible,properties of a software system/component – that is, for"
2,CS 500,Advanced Software Engineering,Duration of assessment,90 minutes
3,CS 500,Advanced Software Engineering,Duration of module,1 Semester
4,CS 500,Advanced Software Engineering,ECTS,"6,Hours per semester present: 56 h (4 SWS),Self-study: 112 h per semester"


In [124]:
len(mmds_course_data)

622

In [290]:
#sample
mmds_course_data[(mmds_course_data.module_code=='IE 672')]

Unnamed: 0,module_code,module_name,property,property_value
350,IE 672,Data Mining II,Admission requirements,"nan,Project report and oral presentation"
351,IE 672,Data Mining II,Aim of module,"• Data Preprocessing,• Dimensionality Reduction,• Anomaly Detection,• Time Series Analysis,• Parameter Tuning,• Ensemble Learning,Expertise:,Students will acquire knowledge of advanced techniques and,applications of data mining.,(MK2, MF1,MF3),Methodological competence:,• Successful participants will be able to address advanced"
352,IE 672,Data Mining II,Duration of assessment,60 minutes
353,IE 672,Data Mining II,Duration of module,1 semester
354,IE 672,Data Mining II,ECTS,"6,Hours per semester: 56 h (4 SWS)"
355,IE 672,Data Mining II,Form of assessment,Written examination
356,IE 672,Data Mining II,Form of module,Lecture with exercises and project
357,IE 672,Data Mining II,Further modules,"-,M.Sc. Wirtschaftsinformatik, M.Sc. Mannheim Master in Data"
358,IE 672,Data Mining II,Language,English
359,IE 672,Data Mining II,Learning outcomes and,"issues in data mining projects, conduct complex projects"


In [237]:
#Nulls in property_value column
len(mmds_course_data[mmds_course_data.property_value.isna()])

18

In [166]:
#Selection Of Properties. (we will be selecting few properties from the table)
mmds_course_property = mmds_course_data['property'].value_counts().rename_axis('property').reset_index(name="counts").copy()
mmds_course_property = mmds_course_property[mmds_course_property.counts>16]
mmds_course_property

Unnamed: 0,property,counts
0,Level,25
1,Duration of assessment,25
2,Prerequisites,25
3,ECTS,25
4,Form of assessment,25
5,Person in charge,25
6,Aim of module,25
7,Workload,24
8,Offering,24
9,Literature,24


In [167]:
list(mmds_course_property.property)

['Level',
 'Duration of assessment',
 'Prerequisites',
 'ECTS',
 'Form of assessment',
 'Person in charge',
 'Aim of module',
 'Workload',
 'Offering',
 'Literature',
 'Lecturer',
 'Type of module',
 'Language',
 'Further modules',
 'Form of module',
 'Duration of module',
 'Semester',
 'Methods',
 'Range of application',
 'Media',
 'Admission requirements',
 'for assessment']

In [275]:
#The chosen ones
property_list = ['Duration of assessment','Prerequisites','ECTS','Form of assessment','Aim of module','Workload',
 'Offering','Lecturer', 'Language','Duration of module','Semester','Methods','Range of application','Admission requirements',]
#choosing the data frame from these property list only
mmds_course_data_ = mmds_course_data[mmds_course_data.property.isin(property_list)].copy()

In [276]:
#create course URI
mmds_course_data_
mmds_course_data_['courseURI'] = mmds_course_data_.module_code.str.lower()+' '+mmds_course_data_.module_name.str.lower()
mmds_course_data_['courseURI'] = mmds_course_data_.courseURI.apply(lambda x: create_uri(x,courseURL))

#create property URI
mmds_course_data_['propertyURI'] = mmds_course_data_.property.str.lower()
mmds_course_data_['propertyURI'] = mmds_course_data_.propertyURI.apply(lambda x: create_uri(x,propertyURL))
mmds_course_data_.head()

#create professor URI
lecturere_mask = mmds_course_data_.property=='Lecturer'
mmds_course_data_.loc[lecturere_mask,'property_value'] = mmds_course_data_.loc[lecturere_mask,'property_value'].str.replace('.','',regex=True)
mmds_course_data_.loc[lecturere_mask,'property_value'] = mmds_course_data_[lecturere_mask].property_value.apply(lambda x: create_uri(x,professorURL))

In [244]:
mmds_course_data_.head()

Unnamed: 0,module_code,module_name,property,property_value,courseURI,propertyURI
0,CS 500,Advanced Software Engineering,Admission requirements,"nan,-",https://www.uni-mannheim.de/en/academics/courses/cs_500_advanced_software_engineering,https://www.uni-mannheim.de/en/academics/property/admission_requirements
1,CS 500,Advanced Software Engineering,Aim of module,"aspects of software systems/components. Key examples,include structural views represented using class diagrams,,operational views expressed using constraint languages and,behavioral views expressed using state diagrams. An,important focus of the course is the use of these views to,define tests and extra-functional properties.,Expertise:,After taking the course, students will be familiar with the latest,state-of-the-art techniques for specifying the externally visible,properties of a software system/component – that is, for",https://www.uni-mannheim.de/en/academics/courses/cs_500_advanced_software_engineering,https://www.uni-mannheim.de/en/academics/property/aim_of_module
2,CS 500,Advanced Software Engineering,Duration of assessment,90 minutes,https://www.uni-mannheim.de/en/academics/courses/cs_500_advanced_software_engineering,https://www.uni-mannheim.de/en/academics/property/duration_of_assessment
3,CS 500,Advanced Software Engineering,Duration of module,1 Semester,https://www.uni-mannheim.de/en/academics/courses/cs_500_advanced_software_engineering,https://www.uni-mannheim.de/en/academics/property/duration_of_module
4,CS 500,Advanced Software Engineering,ECTS,"6,Hours per semester present: 56 h (4 SWS),Self-study: 112 h per semester",https://www.uni-mannheim.de/en/academics/courses/cs_500_advanced_software_engineering,https://www.uni-mannheim.de/en/academics/property/ects


## Build Graph

In [329]:
g = Graph()
g.bind("foaf", FOAF)
g.bind("courseKG", baseURL)
g.bind("prop",propertyURL)
g.bind("owl",OWL)
g.bind("xsd",XSD)

In [330]:
#Unique list of properties
propertyURI = list(mmds_course_data_.propertyURI.unique())
propertyURI

[rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/admission_requirements'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/aim_of_module'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/duration_of_assessment'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/duration_of_module'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/ects'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/form_of_assessment'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/language'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/lecturer'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/methods'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/offering'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/property/prerequisites'),
 rdflib.term.URIRef('https://www.u

In [331]:
#Unique list of Lecturers
lecturerURI = list(mmds_course_data_.loc[lecturere_mask,'property_value'].unique())
lecturerURI

[rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Colin_Atkinson'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Guido_Moerkotte'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Matthias_Krause'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Rainer_Gemulla'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Frederik_Armknecht'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Junior_Professor_Dr_Roland_Lei%C3%9Fa'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Heiko_Paulheim%3B_Prof_Dr_Christian_Bizer'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Heiner_Stuckenschmidt'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor/Prof_Dr_Heiko_Paulheim'),
 rdflib.term.URIRef('https://www.uni-mannheim.de/en/academics/professor

In [332]:
#T-Box
#Classes
g.add((baseURL.programs, RDF.type, RDFS.Class))
g.add((baseURL.module, RDFS.subClassOf, baseURL.programs))
g.add((baseURL.course, RDF.type, RDFS.Class))
g.add((baseURL.people, RDF.type, RDFS.Class ))
g.add((baseURL.people, RDFS.subClassOf, FOAF.Person))
g.add((baseURL.professor,RDFS.subClassOf,baseURL.people))

#predicates

for prop in propertyURI:
    g.add((prop, RDF.type, OWL.DatatypeProperty))
    g.add((prop, RDFS.domain, baseURL.course))
    g.add((prop, RDFS.range, XSD.string))
   
#Lecturer is of OWL.ObjectProperty type
g.set((propertyURL.lecturer, RDF.type, OWL.ObjectProperty))
g.set((propertyURL.lecturer, RDFS.domain, baseURL.course))
g.set((propertyURL.lecturer, RDFS.range, baseURL.professor))

# isCourseOf property
g.add((propertyURL.isCourseOf, RDF.type, OWL.ObjectProperty))
g.add((propertyURL.isCourseOf, RDFS.domain, baseURL.course))
g.add((propertyURL.isCourseOf, RDFS.range, baseURL.module))

# isModuleOf property
g.add((propertyURL.isModuleOf, RDF.type, OWL.ObjectProperty))
g.add((propertyURL.isModuleOf, RDFS.domain, baseURL.module))
g.add((propertyURL.isModuleOf, RDFS.range, baseURL.programs))

# courseCode property
g.add((propertyURL.courseCode, RDF.type, OWL.DatatypeProperty))
g.add((propertyURL.courseCode, RDFS.domain, baseURL.course))
g.add((propertyURL.courseCode, RDFS.range, XSD.string))

# courseName property
g.add((propertyURL.courseName, RDF.type, OWL.DatatypeProperty))
g.add((propertyURL.courseName, RDFS.domain, baseURL.course))
g.add((propertyURL.courseName, RDFS.range, XSD.string))

#Assertion-Box
# Adding MMDS program
g.add((URIRef(mmdsURL),RDF.type,baseURL.programs))

# Adding MMDS modules
for moduleREF in mmds_modules.moduleURI:
    g.add((URIRef(moduleREF),RDF.type,baseURL.module))

# Adding MMDS Courses
for courseREF in mmds_courses.courseURI:
    g.add((URIRef(courseREF), RDF.type,baseURL.course))
    
#Adding professors

for profURI in lecturerURI:
    g.add((profURI,RDF.type,baseURL.professor))


In [333]:
#function to add literal and object instances to the graph
def add_course_details(course_data):
    #reset the index of dataframe to avoid indexing errors
    course_data = course_data.reset_index()
    #counter for row number
    row=0
    num_rows = len(course_data.index)
    
    #for each row in the dataframe
    while row<num_rows:
        #add the triple
        sub, prop, val = (course_data.loc[row,'courseURI'],course_data.loc[row,'propertyURI'],course_data.loc[row,'property_value'])
        
        #check if value is null
        if not pd.isnull((val)):
            #check if the property is lecturer
            if course_data.loc[row,'property']=='Lecturer':
                #link the lecturer object to course
                g.add((sub, prop,val))
            else:
                #link the literal to the course
                g.add((sub, prop, Literal(val)))
        row = row+1
        

In [334]:
#Adding course details
add_course_details(mmds_course_data_)

In [335]:
#linking courses with modules
def link_courses_modules(mmds_courses):
    
    #reset the index of dataframe to avoid indexing errors
    mmds_courses = mmds_courses.reset_index()
    #counter for row number
    row=0
    num_rows = len(mmds_courses.index)
    
    #for each row in the dataframe
    while row<num_rows:
        #add the triple
        #sub, prop, val = (course_data.loc[row,'courseURI'],course_data.loc[row,'propertyURI'],course_data.loc[row,'property_value'])
        #create a mapping dictionary for adding predicates
        map_dict = {"courseCode":mmds_courses.loc[row,"module_code"],
                    "courseName":mmds_courses.loc[row,"Name of Module"],
                    "modURI":create_uri(mmds_courses.loc[row,"moduleCategory"],mmdsModuleURL),
                    "courseURI":mmds_courses.loc[row,"courseURI"]}
        #add triples to link courses with modules
        g.add((map_dict["courseURI"],propertyURL.isCourseOf,map_dict["modURI"]))
        #add courseCOde
        g.add((map_dict["courseURI"],propertyURL.courseCode,Literal(map_dict["courseCode"])))
        #add courseName
        g.add((map_dict["courseURI"],propertyURL.courseName,Literal(map_dict["courseName"])))
            
        row=row+1

In [336]:
#linking courses with modules
link_courses_modules(mmds_courses)

In [337]:
#linking modules with programs
for item in mmds_modules.moduleURI.iteritems():
    g.add((item[1],propertyURL.isModuleOf,URIRef(mmdsURL)))

In [338]:
s = g.serialize(format="ttl")
g.serialize(destination="courseKG.ttl", format="ttl", encoding="utf-8") ;

#print(s)

## SPARQL