In [1]:
# general imports
import os
import coreferee
import re
import spacy
import pandas as pd
# own path/ class imports
from file_paths import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## Application Selection ########################################START
# choose method 
direct_s_bert = True #if True --> no clustering or other means are implemented, all sentences are comapred with each other via S-Bert
legal_s_bert = False #if True --> implementation like S-BERT but based on legal BERT instead of BERT
clustering = False #if True --> 2 approaches calculated: a) topic model + word2vec + cosine sim; b) bert embeddings + kmeans and word2vev + cosine sim
key_phrase = False # if True --> setp one is performed on only key phrases (identified by tfidf), instead of whole sentences
# choose case study
### GDPR adjusted, ISO not!
iso = False #if False --> running with gdpr setup
# choose set up
rea_only_signal = False #if False --> gdpr realization input is not filtered to contain only sentences with signalwords
# choose thresholds:
gamma_s_bert = 0.7 #0.67 #used for sentence mapping 
gamma_grouping = 0.9 #used for sentence mapping in k-means & topic Model approach
gamma_key_phrase = 0.92 #used for key phrase extraction
gamma_one = 0.26 #used for subject phrase mapping
gamma_two = 0.23 #used for verb phrase mapping
gamma_three = 0.2 #used for object phrase mapping
################################################################# END

In [3]:
# Create the nlp object
nlp = spacy.load('en_core_web_trf')
nlp.add_pipe('coreferee', config={}) # resolves coreferences

<coreferee.manager.CorefereeBroker at 0x7fa8123b4a30>

In [4]:
## parse defined lists of constraint signalwords, sequencemarkers and stopwords ########################### START
def read_defined_lists(directory): 
  '''reads in defined txts of constraint signalwords, sequencemarkers and stopwords as lists
  Input: .txt
  Output: list'''
  try:
    with open(directory) as f:
      defined_list = f.read().splitlines()
  except FileNotFoundError:
      print("Wrong file or file path.")
      quit()
  return defined_list

if iso:
  signalwords = read_defined_lists(ISO_SIGNALWORDS)
  ISMS_words = read_defined_lists(ISO_REA_SPEZIFICATION1)
  top_management_words = read_defined_lists(ISO_REA_SPEZIFICATION2)
else:
  signalwords = read_defined_lists(GDPR_SIGNALWORDS)
  controller_words = read_defined_lists(GDPR_REA_SPEZIFICATION1)
  data_protection_officer_words = read_defined_lists(GDPR_REA_SPEZIFICATION2)
  management_words = read_defined_lists(GDPR_REA_SPEZIFICATION3)

################################################################# END

In [5]:

## parse documents ############################################ START
def read_documents(directory): 
  '''reads in txts of regulatory and realization documents
  Input: multiple .txt (each a document article)
  Output: dictionary with article name as key and article text as value'''
  doc_dict = dict()
  files = os.listdir(directory)
  try:
    for fi in files:
        if fi.endswith('.txt'):
          with open(directory+'/'+fi,'r') as f:
              doc_dict[re.sub('\.txt', '', fi)] = f.read()
  except FileNotFoundError:
    print("Wrong file or file path to dir.")
    quit()
  return doc_dict

# reading the raw .txt text
if iso:
  reg_paragraphs = read_documents(ISO_REGULATION_INPUT_DIRECTORY) 
  rea_paragraphs = read_documents(ISO_REALIZATION_INPUT_DIRECTORY) 

else: 
  reg_paragraphs = read_documents(GDPR_REGULATION_INPUT_DIRECTORY) 
  rea_paragraphs = read_documents(GDPR_REALIZATION_INPUT_DIRECTORY) 
################################################################# END

In [6]:
df = pd.DataFrame(reg_paragraphs.items(), columns=['reg_title', 'reg_text'])

In [7]:
def apply_coreference_resolution(text):
    doc = nlp(text)
    # split text in tokens
    list_tokens = list(token.text_with_ws for token in doc)
    for index, _ in enumerate(list_tokens):
        #check if token an identified coreference token
        if None != doc._.coref_chains.resolve(doc[index]):
            new_token = ""
            #extract those tokens that are identified via index by coreferee and replace with best refrence token
            for resolved_token in doc._.coref_chains.resolve(doc[index]):
                new_token = new_token + resolved_token.text + " "
                list_tokens[index] = new_token
    resolved_text = "".join(list_tokens)
    return resolved_text

In [8]:
df['reg_text_resolved'] = df.apply(lambda row : apply_coreference_resolution(row['reg_text']), axis = 1)

In [9]:
pd.options.display.max_colwidth= 2000

In [10]:
def clean_text(text):  
    '''cleans texts'''
    cleaned_text = text.replace(";", ".") #in reg there are many ; which should be counted as seperate senteces
    cleaned_text = cleaned_text.replace("or\n\n\n", "")
    cleaned_text = cleaned_text.replace("or\n\n", "")
    cleaned_text = cleaned_text.replace("and\n\n\n", "")
    cleaned_text = cleaned_text.replace("and\n\n", "")
    cleaned_text = cleaned_text.replace("\n\n\n", "")
    cleaned_text = cleaned_text.replace("\n\n", "")
    cleaned_text = cleaned_text.replace("\n \n", "")
    cleaned_text = cleaned_text.replace("\n", "")
    return cleaned_text 

In [11]:
df['reg_text_cleaned'] = df.apply(lambda row : clean_text(row['reg_text_resolved']), axis = 1)

In [12]:
df = df.drop(['reg_text_resolved'], axis=1)

In [13]:
def ensure_word_embeddings(text):
    '''delete words which are not in spacy vocab - would lead to problems later if not done''' 
    doc = nlp(text) 
    new_para = text
    for token in doc:
        if nlp.vocab.has_vector("token.text"):
            continue
        else:
            new_para = new_para.replace("token.text", "")
    return new_para

In [14]:
df['reg_text_cleaned_2'] = df.apply(lambda row : ensure_word_embeddings(row['reg_text_cleaned']), axis = 1)
df.head()

Unnamed: 0,reg_title,reg_text,reg_text_cleaned,reg_text_cleaned_2
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U..."
1,Processor,"Where processing is to be carried out on behalf of a controller, the controller shall use only processors providing sufficient guarantees to implement appropriate technical and organisational measures in such a manner that processing will meet the requirements of this Regulation and ensure the protection of the rights of the data subject.\n\nThe processor shall not engage another processor without prior specific or general written authorisation of the controller. \n\nIn the case of general written authorisation, the processor shall inform the controller of any intended changes concerning the addition or replacement of other processors, thereby giving the controller the opportunity to object to such changes. \n\nProcessing by a processor shall be governed by a contract or other legal act under Union or Member State law, that is binding on the processor with regard to the controller and that sets out the subject-matter and duration of the processing, the nature and purpose of the processing, the type of personal data and categories of data subjects and the obligations and rights of the controller. \n\nThat contract or other legal act shall stipulate, in particular, that the processor processes the personal data only on documented instructions from the controller, including with regard to transfers of personal data to a third country or an international organisation, unless required to do so by Union or Member State law to which the processor is subject. \n\nin such a case, the processor shall inform the controller of that legal requirement before processing, unless that law prohibits such information on important grounds of public interest. \n\nThat contract or other legal act shall stipulate, in particular, that the processor ensures that persons authorised to process the personal data have committed themselves to confidentiality or are under an appropriate statutory obligation of confidentiality. \n\nThat contract or other legal act shall stipulate, in particul...","Where processing is to be carried out on behalf of a controller, the controller shall use only processors providing sufficient guarantees to implement appropriate technical and organisational measures in such a manner that processing will meet the requirements of this Regulation and ensure the protection of the rights of the data subject.The processor shall not engage another processor without prior specific or general written authorisation of the controller . In the case of general written authorisation, the processor shall inform the controller of any intended changes concerning the addition or replacement of other processors, thereby giving the controller the opportunity to object to such changes. Processing by a processor shall be governed by a contract or other legal act under Union or Member State law, that is binding on the processor with regard to the controller and that sets out the subject-matter and duration of the processing, the nature and purpose of the processing , the type of personal data and categories of data subjects and the obligations and rights of the controller . That contract or other legal act shall stipulate, in particular, that the processor processes the personal data only on documented instructions from the controller , including with regard to transfers of personal data to a third country or an international organisation, unless required to do so by Union or Member State law to which the processor is subject. in such a case, the processor shall inform the controller of that legal requirement before processing, unless that law prohibits such information on important grounds of public interest. That contract or other legal act shall stipulate, in particular, that the processor ensures that persons authorised to process the personal data have committed persons to confidentiality or are under an appropriate statutory obligation of confidentiality. That contract or other legal act shall stipulate, in particular, that the processor take...","Where processing is to be carried out on behalf of a controller, the controller shall use only processors providing sufficient guarantees to implement appropriate technical and organisational measures in such a manner that processing will meet the requirements of this Regulation and ensure the protection of the rights of the data subject.The processor shall not engage another processor without prior specific or general written authorisation of the controller . In the case of general written authorisation, the processor shall inform the controller of any intended changes concerning the addition or replacement of other processors, thereby giving the controller the opportunity to object to such changes. Processing by a processor shall be governed by a contract or other legal act under Union or Member State law, that is binding on the processor with regard to the controller and that sets out the subject-matter and duration of the processing, the nature and purpose of the processing , the type of personal data and categories of data subjects and the obligations and rights of the controller . That contract or other legal act shall stipulate, in particular, that the processor processes the personal data only on documented instructions from the controller , including with regard to transfers of personal data to a third country or an international organisation, unless required to do so by Union or Member State law to which the processor is subject. in such a case, the processor shall inform the controller of that legal requirement before processing, unless that law prohibits such information on important grounds of public interest. That contract or other legal act shall stipulate, in particular, that the processor ensures that persons authorised to process the personal data have committed persons to confidentiality or are under an appropriate statutory obligation of confidentiality. That contract or other legal act shall stipulate, in particular, that the processor take..."
2,Binding corporate rules,"The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that they are legally binding and apply to and are enforced by every member concerned of the group of undertakings, or group of enterprises engaged in a joint economic activity, including their employees. \n\nThe competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that they expressly confer enforceable rights on data subjects with regard to the processing of their personal data. \n\nThe competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that they fulfil the requirements laid down in paragraph 2. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least the structure and contact details of the group of undertakings, or group of enterprises engaged in a joint economic activity and of each of its members. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least the data transfers or set of transfers, including the categories of personal data, the type of processing and its purposes, the type of data subjects affected and the identification of the third country or countries in question. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least their legally binding nature, both internally and externally. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least the application of the general data protection principles, in particular purpose limitation, data minimisation, limited storage periods, data quality, data protection by design and by default, legal basis for processing, processing of special categories of personal data, measures to ensure data security, and the requirements in respect of onward transfers to bodies not bound by ...","The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules are legally binding and apply to and are enforced by every member concerned of the group of undertakings, or group of enterprises engaged in a joint economic activity, including rules employees. The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules expressly confer enforceable rights on data subjects with regard to the processing of rules personal data. The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules fulfil the requirements laid down in paragraph 2. The binding corporate rules referred to in paragraph 1 shall specify at least the structure and contact details of the group of undertakings, or group of enterprises engaged in a joint economic activity and of each of group members. The binding corporate rules referred to in paragraph 1 shall specify at least the data transfers or set of transfers, including the categories of personal data, the type of processing and type purposes, the type of data subjects affected and the identification of the third country or countries in question. The binding corporate rules referred to in paragraph 1 shall specify at least rules legally binding nature, both internally and externally. The binding corporate rules referred to in paragraph 1 shall specify at least the application of the general data protection principles, in particular purpose limitation, data minimisation, limited storage periods, data quality, data protection by design and by default, legal basis for processing, processing of special categories of personal data, measures to ensure data security, and the requirements in respect of onward transfers to bodies not bound by the binding corpor...","The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules are legally binding and apply to and are enforced by every member concerned of the group of undertakings, or group of enterprises engaged in a joint economic activity, including rules employees. The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules expressly confer enforceable rights on data subjects with regard to the processing of rules personal data. The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules fulfil the requirements laid down in paragraph 2. The binding corporate rules referred to in paragraph 1 shall specify at least the structure and contact details of the group of undertakings, or group of enterprises engaged in a joint economic activity and of each of group members. The binding corporate rules referred to in paragraph 1 shall specify at least the data transfers or set of transfers, including the categories of personal data, the type of processing and type purposes, the type of data subjects affected and the identification of the third country or countries in question. The binding corporate rules referred to in paragraph 1 shall specify at least rules legally binding nature, both internally and externally. The binding corporate rules referred to in paragraph 1 shall specify at least the application of the general data protection principles, in particular purpose limitation, data minimisation, limited storage periods, data quality, data protection by design and by default, legal basis for processing, processing of special categories of personal data, measures to ensure data security, and the requirements in respect of onward transfers to bodies not bound by the binding corpor..."
3,Processing under the authority of the controller or processor,"The processor and any person acting under the authority of the controller or of the processor, who has access to personal data, shall not process those data except on instructions from the controller, unless required to do so by Union or Member State law.\n","The processor and any person acting under the authority of the controller or of the processor , who has access to personal data, shall not process those data except on instructions from the controller , unless required to do so by Union or Member State law.","The processor and any person acting under the authority of the controller or of the processor , who has access to personal data, shall not process those data except on instructions from the controller , unless required to do so by Union or Member State law."
4,Data protection by design and by default,"Taking into account the state of the art, the cost of implementation and the nature, scope, context and purposes of processing as well as the risks of varying likelihood and severity for rights and freedoms of natural persons posed by the processing, the controller shall, both at the time of the determination of the means for processing and at the time of the processing itself, implement appropriate technical and organisational measures, such as pseudonymisation, which are designed to implement data-protection principles, such as data minimisation, in an effective manner and to integrate the necessary safeguards into the processing in order to meet the requirements of this Regulation and protect the rights of data subjects. \n\nThe controller shall implement appropriate technical and organisational measures for ensuring that, by default, only personal data which are necessary for each specific purpose of the processing are processed. \n\nThat obligation applies to the amount of personal data collected, the extent of their processing, the period of their storage and their accessibility. \n\nIn particular, such measures shall ensure that by default personal data are not made accessible without the individual's intervention to an indefinite number of natural persons. \n\nAn approved certification mechanism pursuant to Article 42 may be used as an element to demonstrate compliance with the requirements set out in paragraphs 1 and 2 of this Article. \n","Taking into account the state of the art, the cost of implementation and the nature, scope, context and purposes of processing as well as the risks of varying likelihood and severity for rights and freedoms of natural persons posed by the processing, the controller shall, both at the time of the determination of the means for processing and at the time of the processing processing , implement appropriate technical and organisational measures, such as pseudonymisation, which are designed to implement data-protection principles, such as data minimisation, in an effective manner and to integrate the necessary safeguards into the processing in order to meet the requirements of this Regulation and protect the rights of data subjects. The controller shall implement appropriate technical and organisational measures for ensuring that, by default, only personal data which are necessary for each specific purpose of the processing are processed. That obligation applies to the amount of personal data collected, the extent of data processing, the period of data storage and data accessibility. In particular, such measures shall ensure that by default personal data are not made accessible without the individual's intervention to an indefinite number of natural persons. An approved certification mechanism pursuant to Article 42 may be used as an element to demonstrate compliance with the requirements set out in paragraphs 1 and 2 of this Article.","Taking into account the state of the art, the cost of implementation and the nature, scope, context and purposes of processing as well as the risks of varying likelihood and severity for rights and freedoms of natural persons posed by the processing, the controller shall, both at the time of the determination of the means for processing and at the time of the processing processing , implement appropriate technical and organisational measures, such as pseudonymisation, which are designed to implement data-protection principles, such as data minimisation, in an effective manner and to integrate the necessary safeguards into the processing in order to meet the requirements of this Regulation and protect the rights of data subjects. The controller shall implement appropriate technical and organisational measures for ensuring that, by default, only personal data which are necessary for each specific purpose of the processing are processed. That obligation applies to the amount of personal data collected, the extent of data processing, the period of data storage and data accessibility. In particular, such measures shall ensure that by default personal data are not made accessible without the individual's intervention to an indefinite number of natural persons. An approved certification mechanism pursuant to Article 42 may be used as an element to demonstrate compliance with the requirements set out in paragraphs 1 and 2 of this Article."


In [15]:
df = df.drop(['reg_text_cleaned'], axis=1)

In [16]:
def split_into_sent(text):
    doc = nlp(text) 
    sentences = doc.sents
    sent_list = []
    for sentence in sentences:
        sent_list.append(sentence.text.strip())
    return sent_list

In [18]:
df['reg_sent'] = df.apply(lambda row : split_into_sent(row['reg_text_cleaned_2']), axis = 1)
df.head()

Unnamed: 0,reg_title,reg_text,reg_text_cleaned_2,reg_sent
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...","[Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes., Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract., Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject., Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person., Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller ., Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child., Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks., Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX., The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid ..."
1,Processor,"Where processing is to be carried out on behalf of a controller, the controller shall use only processors providing sufficient guarantees to implement appropriate technical and organisational measures in such a manner that processing will meet the requirements of this Regulation and ensure the protection of the rights of the data subject.\n\nThe processor shall not engage another processor without prior specific or general written authorisation of the controller. \n\nIn the case of general written authorisation, the processor shall inform the controller of any intended changes concerning the addition or replacement of other processors, thereby giving the controller the opportunity to object to such changes. \n\nProcessing by a processor shall be governed by a contract or other legal act under Union or Member State law, that is binding on the processor with regard to the controller and that sets out the subject-matter and duration of the processing, the nature and purpose of the processing, the type of personal data and categories of data subjects and the obligations and rights of the controller. \n\nThat contract or other legal act shall stipulate, in particular, that the processor processes the personal data only on documented instructions from the controller, including with regard to transfers of personal data to a third country or an international organisation, unless required to do so by Union or Member State law to which the processor is subject. \n\nin such a case, the processor shall inform the controller of that legal requirement before processing, unless that law prohibits such information on important grounds of public interest. \n\nThat contract or other legal act shall stipulate, in particular, that the processor ensures that persons authorised to process the personal data have committed themselves to confidentiality or are under an appropriate statutory obligation of confidentiality. \n\nThat contract or other legal act shall stipulate, in particul...","Where processing is to be carried out on behalf of a controller, the controller shall use only processors providing sufficient guarantees to implement appropriate technical and organisational measures in such a manner that processing will meet the requirements of this Regulation and ensure the protection of the rights of the data subject.The processor shall not engage another processor without prior specific or general written authorisation of the controller . In the case of general written authorisation, the processor shall inform the controller of any intended changes concerning the addition or replacement of other processors, thereby giving the controller the opportunity to object to such changes. Processing by a processor shall be governed by a contract or other legal act under Union or Member State law, that is binding on the processor with regard to the controller and that sets out the subject-matter and duration of the processing, the nature and purpose of the processing , the type of personal data and categories of data subjects and the obligations and rights of the controller . That contract or other legal act shall stipulate, in particular, that the processor processes the personal data only on documented instructions from the controller , including with regard to transfers of personal data to a third country or an international organisation, unless required to do so by Union or Member State law to which the processor is subject. in such a case, the processor shall inform the controller of that legal requirement before processing, unless that law prohibits such information on important grounds of public interest. That contract or other legal act shall stipulate, in particular, that the processor ensures that persons authorised to process the personal data have committed persons to confidentiality or are under an appropriate statutory obligation of confidentiality. That contract or other legal act shall stipulate, in particular, that the processor take...","[Where processing is to be carried out on behalf of a controller, the controller shall use only processors providing sufficient guarantees to implement appropriate technical and organisational measures in such a manner that processing will meet the requirements of this Regulation and ensure the protection of the rights of the data subject., The processor shall not engage another processor without prior specific or general written authorisation of the controller ., In the case of general written authorisation, the processor shall inform the controller of any intended changes concerning the addition or replacement of other processors, thereby giving the controller the opportunity to object to such changes., Processing by a processor shall be governed by a contract or other legal act under Union or Member State law, that is binding on the processor with regard to the controller and that sets out the subject-matter and duration of the processing, the nature and purpose of the processing , the type of personal data and categories of data subjects and the obligations and rights of the controller ., That contract or other legal act shall stipulate, in particular, that the processor processes the personal data only on documented instructions from the controller , including with regard to transfers of personal data to a third country or an international organisation, unless required to do so by Union or Member State law to which the processor is subject., in such a case, the processor shall inform the controller of that legal requirement before processing, unless that law prohibits such information on important grounds of public interest. That contract or other legal act shall stipulate, in particular, that the processor ensures that persons authorised to process the personal data have committed persons to confidentiality or are under an appropriate statutory obligation of confidentiality., That contract or other legal act shall stipulate, in particular, that the proces..."
2,Binding corporate rules,"The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that they are legally binding and apply to and are enforced by every member concerned of the group of undertakings, or group of enterprises engaged in a joint economic activity, including their employees. \n\nThe competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that they expressly confer enforceable rights on data subjects with regard to the processing of their personal data. \n\nThe competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that they fulfil the requirements laid down in paragraph 2. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least the structure and contact details of the group of undertakings, or group of enterprises engaged in a joint economic activity and of each of its members. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least the data transfers or set of transfers, including the categories of personal data, the type of processing and its purposes, the type of data subjects affected and the identification of the third country or countries in question. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least their legally binding nature, both internally and externally. \n\nThe binding corporate rules referred to in paragraph 1 shall specify at least the application of the general data protection principles, in particular purpose limitation, data minimisation, limited storage periods, data quality, data protection by design and by default, legal basis for processing, processing of special categories of personal data, measures to ensure data security, and the requirements in respect of onward transfers to bodies not bound by ...","The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules are legally binding and apply to and are enforced by every member concerned of the group of undertakings, or group of enterprises engaged in a joint economic activity, including rules employees. The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules expressly confer enforceable rights on data subjects with regard to the processing of rules personal data. The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules fulfil the requirements laid down in paragraph 2. The binding corporate rules referred to in paragraph 1 shall specify at least the structure and contact details of the group of undertakings, or group of enterprises engaged in a joint economic activity and of each of group members. The binding corporate rules referred to in paragraph 1 shall specify at least the data transfers or set of transfers, including the categories of personal data, the type of processing and type purposes, the type of data subjects affected and the identification of the third country or countries in question. The binding corporate rules referred to in paragraph 1 shall specify at least rules legally binding nature, both internally and externally. The binding corporate rules referred to in paragraph 1 shall specify at least the application of the general data protection principles, in particular purpose limitation, data minimisation, limited storage periods, data quality, data protection by design and by default, legal basis for processing, processing of special categories of personal data, measures to ensure data security, and the requirements in respect of onward transfers to bodies not bound by the binding corpor...","[The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules are legally binding and apply to and are enforced by every member concerned of the group of undertakings, or group of enterprises engaged in a joint economic activity, including rules employees., The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules expressly confer enforceable rights on data subjects with regard to the processing of rules personal data., The competent supervisory authority shall approve binding corporate rules in accordance with the consistency mechanism set out in Article 63, provided that rules fulfil the requirements laid down in paragraph 2., The binding corporate rules referred to in paragraph 1 shall specify at least the structure and contact details of the group of undertakings, or group of enterprises engaged in a joint economic activity and of each of group members., The binding corporate rules referred to in paragraph 1 shall specify at least the data transfers or set of transfers, including the categories of personal data, the type of processing and type purposes, the type of data subjects affected and the identification of the third country or countries in question., The binding corporate rules referred to in paragraph 1 shall specify at least rules legally binding nature, both internally and externally., The binding corporate rules referred to in paragraph 1 shall specify at least the application of the general data protection principles, in particular purpose limitation, data minimisation, limited storage periods, data quality, data protection by design and by default, legal basis for processing, processing of special categories of personal data, measures to ensure data security, and the requirements in respect of onward transfers to bodies not bound by the binding..."
3,Processing under the authority of the controller or processor,"The processor and any person acting under the authority of the controller or of the processor, who has access to personal data, shall not process those data except on instructions from the controller, unless required to do so by Union or Member State law.\n","The processor and any person acting under the authority of the controller or of the processor , who has access to personal data, shall not process those data except on instructions from the controller , unless required to do so by Union or Member State law.","[The processor and any person acting under the authority of the controller or of the processor , who has access to personal data, shall not process those data except on instructions from the controller , unless required to do so by Union or Member State law.]"
4,Data protection by design and by default,"Taking into account the state of the art, the cost of implementation and the nature, scope, context and purposes of processing as well as the risks of varying likelihood and severity for rights and freedoms of natural persons posed by the processing, the controller shall, both at the time of the determination of the means for processing and at the time of the processing itself, implement appropriate technical and organisational measures, such as pseudonymisation, which are designed to implement data-protection principles, such as data minimisation, in an effective manner and to integrate the necessary safeguards into the processing in order to meet the requirements of this Regulation and protect the rights of data subjects. \n\nThe controller shall implement appropriate technical and organisational measures for ensuring that, by default, only personal data which are necessary for each specific purpose of the processing are processed. \n\nThat obligation applies to the amount of personal data collected, the extent of their processing, the period of their storage and their accessibility. \n\nIn particular, such measures shall ensure that by default personal data are not made accessible without the individual's intervention to an indefinite number of natural persons. \n\nAn approved certification mechanism pursuant to Article 42 may be used as an element to demonstrate compliance with the requirements set out in paragraphs 1 and 2 of this Article. \n","Taking into account the state of the art, the cost of implementation and the nature, scope, context and purposes of processing as well as the risks of varying likelihood and severity for rights and freedoms of natural persons posed by the processing, the controller shall, both at the time of the determination of the means for processing and at the time of the processing processing , implement appropriate technical and organisational measures, such as pseudonymisation, which are designed to implement data-protection principles, such as data minimisation, in an effective manner and to integrate the necessary safeguards into the processing in order to meet the requirements of this Regulation and protect the rights of data subjects. The controller shall implement appropriate technical and organisational measures for ensuring that, by default, only personal data which are necessary for each specific purpose of the processing are processed. That obligation applies to the amount of personal data collected, the extent of data processing, the period of data storage and data accessibility. In particular, such measures shall ensure that by default personal data are not made accessible without the individual's intervention to an indefinite number of natural persons. An approved certification mechanism pursuant to Article 42 may be used as an element to demonstrate compliance with the requirements set out in paragraphs 1 and 2 of this Article.","[Taking into account the state of the art, the cost of implementation and the nature, scope, context and purposes of processing as well as the risks of varying likelihood and severity for rights and freedoms of natural persons posed by the processing, the controller shall, both at the time of the determination of the means for processing and at the time of the processing processing , implement appropriate technical and organisational measures, such as pseudonymisation, which are designed to implement data-protection principles, such as data minimisation, in an effective manner and to integrate the necessary safeguards into the processing in order to meet the requirements of this Regulation and protect the rights of data subjects., The controller shall implement appropriate technical and organisational measures for ensuring that, by default, only personal data which are necessary for each specific purpose of the processing are processed., That obligation applies to the amount of personal data collected, the extent of data processing, the period of data storage and data accessibility., In particular, such measures shall ensure that by default personal data are not made accessible without the individual's intervention to an indefinite number of natural persons., An approved certification mechanism pursuant to Article 42 may be used as an element to demonstrate compliance with the requirements set out in paragraphs 1 and 2 of this Article.]"


In [19]:
df_new = df.explode(['reg_sent'])
df_new.head()

Unnamed: 0,reg_title,reg_text,reg_text_cleaned_2,reg_sent
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...",Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes.
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...",Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract.
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...",Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject.
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...",Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person.
0,Lawfulness of processing,"Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of his or her personal data for one or more specific purposes. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. \n\nProcessing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. \n\nProcessing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller. \n \nProcessing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. \n\nPoint (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of their tasks. \n\nMember States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. \n\nThe basis for the processing referred to in point (c) and (e) of paragra...","Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract. Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject. Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person. Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller . Processing shall be lawful only if and to the extent that processing is necessary for the purposes of the legitimate interests pursued by the controller or by a third party, except where such interests are overridden by the interests or fundamental rights and freedoms of the data subject which require protection of personal data, in particular where the data subject is a child. Point (f) of the first subparagraph shall not apply to processing carried out by public authorities in the performance of child tasks. Member States may maintain or introduce more specific provisions to adapt the application of the rules of this Regulation with regard to processing for compliance with points (c) and (e) of paragraph 1 by determining more precisely specific requirements for the processing and other measures to ensure lawful and fair processing including for other specific processing situations as provided for in Chapter IX. The basis for the processing referred to in point (c) and (e) of paragraph 1 shall be laid down by U...",Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller .


In [20]:
df_new = df_new.drop(['reg_text','reg_text_cleaned_2'], axis=1)
df_new.head()

Unnamed: 0,reg_title,reg_sent
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller .


In [21]:
def keep_only_signalword_sentences(text):
    ''' only keep those sentences that contain at least one signalword'''
    doc = nlp(text) 
    i = 0
    for token in doc: 
        if (token.text in signalwords):
            i = 1
            break
        else:
            continue
    return i

In [22]:
df_new['contains_signalword'] = df_new.apply(lambda row : keep_only_signalword_sentences(row['reg_sent']), axis = 1)

In [23]:
df_new = df_new[df_new.contains_signalword != 0]
df_new = df_new.drop(['contains_signalword'], axis=1)


In [24]:
df_new.head()

Unnamed: 0,reg_title,reg_sent
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person.
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller .


In [25]:
# extract sent keyphrase with RAKE
import pandas as pd
from rake_nltk import Rake
import re
import os

In [26]:
def RAKE_Keyword_Extraction(text, stop_word_path, threshold):

    # our extracted keywords, min 1, max 5.
    keywords = []
    phrases = []
    phrases1 = []
    phrases2 = []
    max_len = 5
    min_len = 1
    # uncustomized stopwordlist
    stop_words = []
    with open(stop_word_path, 'r') as f:
        for w in f.readlines():
            stop_words.append(w.strip())
        f.close()
    # initialize the Rake keyword extractor
    r = Rake(stopwords=stop_words, max_length=max_len, min_length=min_len)
    #text = re.sub('[^a-zA-Z]', ' ', text)
    r.extract_keywords_from_sentences([text])
    # rank the extracted keywords
    phrases = r.get_ranked_phrases_with_scores()
    # exclude keywords, with scores lower than the threshold
    phrases2.extend([p[1] for p in phrases if len(p[1]) > 1 and p[0] > threshold and p[1] not in phrases2])
    if len(phrases2) >= 5:  # maximal 5 keywords
        keywords.append(phrases2[:5])
    elif 0 < len(phrases2) < 5:  # take the rest
        keywords.append(phrases2)
    else:
        phrases1 = r.get_ranked_phrases()
        if len(phrases) >= 5:
            keywords.append(phrases1[:5])
        else:
            keywords.append(phrases1)
    keyword_list = ' '.join(map(str, keywords))
    return keyword_list

In [27]:
#output of function = one concated keyword string
df_new['keywords_sent'] = df_new.apply(lambda row : RAKE_Keyword_Extraction(row['reg_sent'], GDPR_STOPWORDS_RAKE, 3), axis = 1)
df_new.head()

Unnamed: 0,reg_title,reg_sent,keywords_sent
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes.,"['specific purposes', 'personal data', 'given consent', 'data subject']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract.,"['data subject prior', 'data subject', 'take steps']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject.,['legal obligation']
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person.,"['another natural person', 'vital interests', 'data subject']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller .,"['official authority vested', 'task carried', 'public interest']"


In [28]:
df_new['keywords_title'] = df_new.apply(lambda row : RAKE_Keyword_Extraction(row['reg_title'], GDPR_STOPWORDS_RAKE, 3), axis = 1)
df_new.head()

Unnamed: 0,reg_title,reg_sent,keywords_sent,keywords_title
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes.,"['specific purposes', 'personal data', 'given consent', 'data subject']","['processing', 'lawfulness']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract.,"['data subject prior', 'data subject', 'take steps']","['processing', 'lawfulness']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject.,['legal obligation'],"['processing', 'lawfulness']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person.,"['another natural person', 'vital interests', 'data subject']","['processing', 'lawfulness']"
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller .,"['official authority vested', 'task carried', 'public interest']","['processing', 'lawfulness']"


In [29]:
def join_keywords(a,b):
    string = re.sub(r'[^\w\s]','',a)
    string2 = re.sub(r'[^\w\s]','',b)
    c = string + " " + string2
    return c

In [30]:
import re
df_new['reg_kw_total'] = df_new.apply(lambda row : join_keywords(row['keywords_sent'], row['keywords_title']), axis =1)
df_new.head()

Unnamed: 0,reg_title,reg_sent,keywords_sent,keywords_title,reg_kw_total
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that the data subject has given consent to the processing of subject or her personal data for one or more specific purposes.,"['specific purposes', 'personal data', 'given consent', 'data subject']","['processing', 'lawfulness']",specific purposes personal data given consent data subject processing lawfulness
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a contract to which the data subject is party or in order to take steps at the request of the data subject prior to entering into a contract.,"['data subject prior', 'data subject', 'take steps']","['processing', 'lawfulness']",data subject prior data subject take steps processing lawfulness
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for compliance with a legal obligation to which the controller is subject.,['legal obligation'],"['processing', 'lawfulness']",legal obligation processing lawfulness
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary in order to protect the vital interests of the data subject or of another natural person.,"['another natural person', 'vital interests', 'data subject']","['processing', 'lawfulness']",another natural person vital interests data subject processing lawfulness
0,Lawfulness of processing,Processing shall be lawful only if and to the extent that processing is necessary for the performance of a task carried out in the public interest or in the exercise of official authority vested in the controller .,"['official authority vested', 'task carried', 'public interest']","['processing', 'lawfulness']",official authority vested task carried public interest processing lawfulness


In [31]:
# save preprocessed reg to excel
pd.DataFrame(df_new).to_excel(join(INTERMEDIATE_DIRECTORY, "gdpr_reg_preprocessed_optiona.xlsx"))  