### Creating the Medical Device Regulation knowledge database

The structure of the document is composed by a preface that sets up the intention of the document followed by chapters that cover specific topics.

<a href="https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32017R0745">Medical Device Regulatin website</a>

Creating a dictionary with the a structure similar to that of the Jeopardy example in the <a href="https://learn.deeplearning.ai/vector-databases-embeddings-applications/lesson/5/vector-databases">Weaviate tutorial in deeplearning</a>:

The MDR dictionary will follow the structure below:

MDR_dict[element] = {

            'chapter': 'CHAPTER V, CLASSIFICATION AND CONFORMITY ASSESSMENT',

            'article': '51, classification of devices',

            'definition': '1, Devices shall be divided into classes I, IIa, IIb and III, taking into account the intended purpose of the devices and their inherent risks. Classification shall be carried out in accordance with Annex VIII.'

}

In [2]:

from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import re

In [3]:
# Reading the MDR website 
url = "https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32017R0745"
page = urlopen(url)
html_bytes = page.read()
html = html_bytes.decode("utf-8")
# Parsing html information
doc = BeautifulSoup(html, 'html.parser')

### Inspecting HTML code

Identifying elements and keywords that contain useful information:
 * most definitions: class_="normal"
 * chapter number: class_="ti-section-1"
 * chapter text: class_="ti-section-2"
 * article number: class_="ti-art"
 * article text: class_="sti-art" 

In [4]:
# extracting chapter title, article title and information

chapters = doc.find_all(["p"],class_=["ti-section-2"])
articles = doc.find_all(["p"],class_=["sti-art"])
information = doc.find_all(["p"],class_="normal")

In [5]:

ch_idx = 0
print(chapters[ch_idx].get_text())
art_idx = 0
print(articles[art_idx].get_text())
inf_idx = 212
print(information[inf_idx].get_text())


SCOPE AND DEFINITIONS

Subject matter and scope
1.   This Regulation lays down rules concerning the placing on the market, making available on the market or putting into service of medical devices for human use and accessories for such devices in the Union. This Regulation also applies to clinical investigations concerning such medical devices and accessories conducted in the Union.


In [6]:
with open("hello.txt",'w') as my_file:
    for iInfo in range(0,len(information)):
        my_file.write(str(information[iInfo].get_text()) + " \n ")

In [7]:
# creating a csv file with all the information, to cut and paste specific 
import csv

with open('MDR.csv', 'w', newline='') as file:
    writer = csv.writer(file)

    for iInfo in range(0,len(information)):
        writer.writerow([str(information[iInfo].get_text())])

In [8]:
# Functions to manipulate and add elements to the dictionary

def add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, append_text="",startAt=0):
    index_dict = len(MDR_dict)
    count_index = 0
    if len(append_text)>0:
        append_text = append_text + " "
    for iInfo in range(iterStart, iterEnd+1, iterStep):
        information_text = append_text + information[iInfo].get_text()
        information_text = information_text[startAt:]
        MDR_dict = add_dictionary (MDR_dict, index_dict+count_index, chapter_text, article_text, information_text)
        count_index += 1
        
    return MDR_dict

def add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, append_text="", startAt=0):
    index_dict = len(MDR_dict)
    lines_text = ""
    if len(append_text)>0:
        append_text = append_text + " "
    for iInfo in range(iterStart, iterEnd+1, iterStep):
        lines_text = lines_text + information[iInfo].get_text()
    information_text = append_text + lines_text
    information_text = information_text[startAt:]
    MDR_dict = add_dictionary (MDR_dict, index_dict, chapter_text, article_text, information_text)
        
    return MDR_dict

def add_dictionary (MDR_dict, index_dict, chapter_text, article_text, information_text):
    MDR_dict[index_dict] = {
            'chapter': chapter_text,
            'article': article_text,
            'definition': information_text
        }
    return MDR_dict

In [None]:
def include_chapter0 (MDR_dict, information, chapters, articles):
    chapter_text = 'about the medical device regulation'
    article_text = 'aims of the document'
    
    iterStart = 9
    iterEnd = 201
    iterStep = 2
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep)

    iterStart = 203
    iterEnd = 204
    iterStep = 1
    MDR_dict = add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep)

    iterStart = 206
    iterEnd = 211
    iterStep = 2
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep)

    return MDR_dict


def include_chapterI (MDR_dict, information, chapters, articles):
    
    chapter_text = chapters[0].get_text()
    article_text = articles[0].get_text()
    
    iterStart = 212
    iterEnd = 212
    iterStep = 1
    startAt = 5
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    iterStart = 213
    iterEnd = 216
    iterStep = 1
    startAt = 5
    MDR_dict = add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    iterStart = 217
    iterEnd = 219
    iterStep = 1
    startAt = 5
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    # corresponds to dictionary indexes 106 to 114
    append_text = information[220].get_text()
    iterStart = 222
    iterEnd = 238
    iterStep = 2
    startAt = 5
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, append_text, startAt)

    iterStart = 239
    iterEnd = 239
    iterStep = 1
    startAt = 5
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    iterStart = 240
    iterEnd = 241
    iterStep = 1
    startAt = 5
    MDR_dict = add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    iterStart = 242
    iterEnd = 243
    iterStep = 1
    startAt = 5
    MDR_dict = add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    iterStart = 244
    iterEnd = 245
    iterStep = 1
    startAt = 5
    MDR_dict = add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    # corresponds to dictionary index 118
    iterStart = 246
    iterEnd = 247
    iterStep = 1
    startAt = 6
    MDR_dict = add_lines_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    iterStart = 248
    iterEnd = 251
    iterStep = 1
    startAt = 6
    MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, "", startAt)

    # article 2
    # article_text = articles[1].get_text()

    # append_text = information[252].get_text()
    # iterStart = 254
    # iterEnd = 238
    # iterStep = 2
    # startAt = 5
    # MDR_dict = add_loop_dictionary(MDR_dict, chapter_text, article_text, information, iterStart, iterEnd, iterStep, append_text, startAt)

    return MDR_dict


def include_chapterII (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterIII (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterIV (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterV (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterVI (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterVII (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterVIII (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterIX (MDR_dict, information, chapters, articles):

    return MDR_dict


def include_chapterX (MDR_dict, information, chapters, articles):

    return MDR_dict

In [None]:
def creating_MDR_dict(information, chapters, articles):
    MDR_dict = {}
    MDR_dict = include_chapter0     (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterI     (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterII    (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterIII   (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterIV    (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterV     (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterVI    (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterVII   (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterVIII  (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterIX    (MDR_dict, information, chapters, articles)
    MDR_dict = include_chapterX     (MDR_dict, information, chapters, articles)

    return MDR_dict

In [None]:
testDict = creating_MDR_dict(information, chapters, articles)
print(len(testDict))
print(testDict[123]['definition'])

info_test = information[254].get_text()
print(info_test)
startIn = 6
# print(info_test[startIn:])

In [None]:
import json 
	
MDR_dictionary = creating_MDR_dict(information, chapters, articles)
	
# Convert and write JSON object to file
with open("MDR_dictionary.json", "w") as outfile: 
	json.dump(MDR_dictionary, outfile)
