In [1]:
from nuclia import sdk
from nuclia_models.worker.tasks import TaskName, ApplyOptions, DataAugmentation
from nuclia_models.worker.proto import (
	GraphOperation,
	EntityDefinition,
	EntityExample,
	RelationExample,
	LLMConfig,
	Filter,
	ApplyTo,
	Operation,
)




In [2]:
!nuclia auth login


User: Maximo Meya Morales <maximo.meya@estudiantat.upc.edu>
Type: USER



Logged in!


In [3]:
kb = sdk.NucliaKB()

In [4]:
entity_defs = [
    EntityDefinition(
        label="COMPANY_SERVICE",
        description="The legal entity, brand name, or specific service that the privacy policy pertains to and which is responsible for data processing.",
        # Examples: "23andMe", "Fiverr", "Groupon", "Keep App", "TickTick", "Viber", "PeopleFun"
    ),
    EntityDefinition(
        label="DATA_CATEGORY",
        description="A specific type or category of personal or non-personal information that is collected, processed, stored, or shared by the COMPANY_SERVICE.",
        # Examples: "Registration Information", "Genetic Information", "IP Address", "Payment Information", "Location Data", "Cookie Data", "User Content", "Usage Information"
    ),
    EntityDefinition(
        label="SENSITIVE_DATA_CATEGORY",
        description="A sub-category of DATA_CATEGORY that refers to legally recognized sensitive personal information requiring special protection, such as health data, genetic data, racial or ethnic origin, or sexual orientation.",
        # Example: "Genetic Information", "Health-related information", "racial and ethnic origin"
    ),
    EntityDefinition(
        label="PURPOSE_OF_PROCESSING",
        description="The specific reason, goal, or objective for which a DATA_CATEGORY is collected, used, or otherwise processed by the COMPANY_SERVICE.",
        # Examples: "Provide Services", "Improve Services", "Marketing", "Analytics", "Security", "Legal Compliance"
    ),
    EntityDefinition(
        label="THIRD_PARTY_TYPE", # Changed from THIRD_PARTY to be more of a 'type'
        description="A type of external entity, company, or service provider (distinct from the user and COMPANY_SERVICE) that may receive or process data, or is involved in service delivery.",
        # Examples: "Service Providers", "Payment Processors", "Advertising Partners", "Analytics Providers", "Social Media Platforms", "Law Enforcement"
    ),
    EntityDefinition(
        label="USER_RIGHT",
        description="A specific entitlement or control that an individual user has regarding their personal information as outlined in the privacy policy.",
        # Examples: "Right to Access", "Right to Erasure", "Right to Withdraw Consent", "Data Portability", "Opt-out of Marketing"
    ),
    EntityDefinition(
        label="SECURITY_MEASURE",
        description="A specific technical, administrative, or physical safeguard implemented by the COMPANY_SERVICE to protect personal information.",
        # Examples: "Encryption", "De-identification", "Access Controls", "Firewalls", "SSL Technology"
    ),
    EntityDefinition(
        label="DATA_RETENTION_POLICY", # Made it more of a policy/concept
        description="Statements or principles describing how long, and under what conditions, different types of data are stored by the COMPANY_SERVICE.",
        # Often described with conditions like "as long as account is active", "for legal purposes"
    ),
    EntityDefinition(
        label="TRACKING_TECHNOLOGY",
        description="A specific technology or method (e.g., cookies, web beacons, pixels, SDKs) used by the COMPANY_SERVICE or its partners to collect information about users, often related to web/app usage and preferences.",
        # Examples: "Cookies", "Web Beacons", "Log Files", "Google Analytics SDK"
    ),
    EntityDefinition(
        label="COOKIE_TYPE",
        description="A specific category or type of cookie used by the service, often distinguished by its purpose (e.g., necessary, performance, functionality, advertising).",
        # Examples: "Necessary Cookies", "Performance Cookies", "Functionality Cookies", "Targeting Cookies"
    ),
    EntityDefinition(
        label="GEOGRAPHIC_AREA",
        description="A country, region, state, or specific legal jurisdiction mentioned in the context of data processing, storage, data transfer, or applicable law.",
        # Examples: "United States", "EEA", "California", "Luxembourg"
    ),
    EntityDefinition(
        label="LEGAL_BASIS",
        description="The lawful justification claimed by the COMPANY_SERVICE for processing personal data under applicable data protection regulations (e.g., GDPR).",
        # Examples: "Consent", "Performance of Contract", "Legitimate Interests", "Legal Obligation"
    ),
    EntityDefinition(
        label="SPECIFIC_REGULATION",
        description="A specific named law, directive, regulation, or legal framework relevant to data privacy and protection mentioned in the policy.",
        # Examples: "GDPR", "CCPA", "COPPA", "Privacy Shield Framework"
    ),
    EntityDefinition(
        label="USER_ACCOUNT_INFO", # More specific than just "User"
        description="Information pertaining to a user's registered account with the service, such as user ID, password, profile settings, and account status.",
        # Examples: "User ID", "password", "Account Settings"
    ),
    EntityDefinition(
        label="CONTACT_METHOD", # For how users or the company can be contacted
        description="A specific method or address for communication, such as an email address, physical address, or support channel.",
        # Examples: "privacy@company.com", "Support Ticket", "Postal Address"
    ),
    EntityDefinition(
        label="CHILDRENS_DATA_POLICY", # Policies often have a specific section for this
        description="Statements or rules specifically addressing the collection and processing of personal information from children (e.g., under 13 or 16).",
        # Often mentions COPPA or age limits
    ),
    EntityDefinition(
        label="DATA_TRANSFER_MECHANISM",
        description="Mechanisms or safeguards mentioned for transferring personal data across international borders, particularly outside of jurisdictions like the EEA.",
        # Examples: "Standard Contractual Clauses (SCCs)", "Binding Corporate Rules (BCRs)", "Privacy Shield"
    ),
    EntityDefinition(
        label="POLICY_SECTION", # To capture references to sections of the privacy policy itself
        description="A named or numbered section within the privacy policy document itself that is referenced.",
        # Example: "Section 3.h", "Your Choices section"
    ),
    EntityDefinition(
        label="DURATION", # For retention periods or notice periods
        description="A specific period of time mentioned, such as for data retention, opt-out effectiveness, or notice before changes.",
        # Examples: "30 days", "2 weeks", "as long as necessary" (this might be harder to make specific)
    )
]

In [5]:
entity_examples = [
    # --- COMPANY_SERVICE ---
    EntityExample(
        name="Fiverr",
        label="COMPANY_SERVICE",
        example="At Fiverr we care about your privacy." 
    ), # Source: Fiverr.txt
    EntityExample(
        name="23andMe",
        label="COMPANY_SERVICE",
        example="When you purchase our Services or create a 23andMe account and register your kit, we collect Personal Information..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="Groupon",
        label="COMPANY_SERVICE",
        example="This Privacy Statement does not apply to the collection of information in any way other than as listed above." # Assumes "Groupon" is established as the context provider.
    ), # Source: Groupon.txt (Note: Sometimes the company name is implicit in the policy's context)
    EntityExample(
        name="Keep App",
        label="COMPANY_SERVICE",
        example="We collect information from you when you use Keep App - for example, when you: Register to use Keep App;"
    ), # Source: Keep.txt
    EntityExample(
        name="TickTick",
        label="COMPANY_SERVICE",
        example="This Privacy Policy explains how information is collected, used and disclosed by TickTick with respect to users access and use of our service..."
    ), # Source: TickTick_ To Do List with Reminder, Day Planner.txt
    EntityExample(
        name="Viber",
        label="COMPANY_SERVICE",
        example="Please make sure you read and agree with our Terms of Use if you want to use Viber."
    ), # Source: Viber Messenger.txt
    EntityExample(
        name="PeopleFun",
        label="COMPANY_SERVICE",
        example="PeopleFun is committed to protecting your privacy."
    ), # Source: Wordscapes.txt (PeopleFun is the company behind Wordscapes)

    # --- DATA_CATEGORY ---
    EntityExample(
        name="IP address",
        label="DATA_CATEGORY",
        example="We collect information that you provide us or voluntarily share... such as IP address, browser information and cookies..."
    ), # Source: Fiverr.txt
    EntityExample(
        name="Genetic Information",
        label="DATA_CATEGORY", # Could also be SENSITIVE_DATA_CATEGORY if you want both
        example="23andMe collects and stores the following types of Personal Information: Genetic Information: information regarding your genotypes..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="payment information",
        label="DATA_CATEGORY",
        example="When you purchase our Services or create a 23andMe account... we collect... payment information (eg, credit card)..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="Location data",
        label="DATA_CATEGORY",
        example="Information we collect in the course of you using Keep - such as certain location data and log data"
    ), # Source: Keep.txt
    EntityExample(
        name="your devices configuration", # Example of a more technical data category
        label="DATA_CATEGORY",
        example="Non-Personal Information: It includes but is not limited to your devices configuration, the package ID and version of the application that you use."
    ), # Source: TickTick_ To Do List with Reminder, Day Planner.txt
    EntityExample(
        name="your mobile devices address book",
        label="DATA_CATEGORY",
        example="When you install the Viber App, youll also be asked to allow us access to your mobile devices address book."
    ), # Source: Viber Messenger.txt
    EntityExample(
        name="usage statistics about your interactions with the Games",
        label="DATA_CATEGORY",
        example="When you access our Services, we may collect... usage statistics about your interactions with the Games."
    ), # Source: Wordscapes.txt

    # --- SENSITIVE_DATA_CATEGORY ---
    EntityExample(
        name="Genetic Information", # Note: "Genetic Information" is also a DATA_CATEGORY
        label="SENSITIVE_DATA_CATEGORY",
        example="Sensitive Information: information about your health, Genetic Information, and certain Self-Reported Information such as racial and ethnic origin..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="health", # as in health information
        label="SENSITIVE_DATA_CATEGORY",
        example="Sensitive Information: information about your health, Genetic Information..."
    ), # Source: 23andMe.txt

    # --- PURPOSE_OF_PROCESSING ---
    EntityExample(
        name="Site operation",
        label="PURPOSE_OF_PROCESSING",
        example="Technical information that is gathered by our systems... automatically may be used for Site operation, optimization, analytics..."
    ), # Source: Fiverr.txt
    EntityExample(
        name="improve our Services",
        label="PURPOSE_OF_PROCESSING",
        example="We use the information described above in Section 2 to operate, provide, analyze and improve our Services."
    ), # Source: 23andMe.txt
    EntityExample(
        name="calculate your consumption of calorie",
        label="PURPOSE_OF_PROCESSING",
        example="We need your age, height and weight to calculate your consumption of calorie when you complete each training class."
    ), # Source: Keep.txt
    EntityExample(
        name="personalize your experience",
        label="PURPOSE_OF_PROCESSING",
        example="personalize your experience by providing content (such as games) on the Service, including targeted advertising..."
    ), # Source: Viber Messenger.txt
    EntityExample(
        name="prevent fraud",
        label="PURPOSE_OF_PROCESSING",
        example="we may retain personal information from closed accounts to comply with applicable laws, prevent fraud..."
    ), # Source: Fiverr.txt
     EntityExample(
        name="analyze usage and trends",
        label="PURPOSE_OF_PROCESSING",
        example="to analyze usage and trends with anonymous user data, and to improve the quality of our service..."
    ), # Source: TickTick_ To Do List with Reminder, Day Planner.txt
    EntityExample(
        name="deliver targeted advertising",
        label="PURPOSE_OF_PROCESSING",
        example="certain technical information... to develop and deliver targeted advertising in the Games..."
    ), # Source: Wordscapes.txt


    # --- THIRD_PARTY_TYPE ---
    EntityExample(
        name="third party service providers",
        label="THIRD_PARTY_TYPE",
        example="Your personal information may be stored in systems based around the world, and may be processed by third party service providers acting on our behalf."
    ), # Source: Fiverr.txt
    EntityExample(
        name="our contracted genotyping laboratory", # More specific type
        label="THIRD_PARTY_TYPE",
        example="...submit your saliva sample to our contracted genotyping laboratory, which processes and analyzes your sample..."
    ), # Source: 23andMe.txt
     EntityExample(
        name="Business Partners",
        label="THIRD_PARTY_TYPE",
        example="...provide information to enroll or participate inOther Programsprovided on behalf of, or together with,Business Partners;"
    ), # Source: Groupon.txt
    EntityExample(
        name="Apple HealthKit", # Specific service
        label="THIRD_PARTY_TYPE",
        example="For example, we may send and receive data to and from Apple HealthKit to collaborate training consumption calculation..."
    ), # Source: Keep.txt
    EntityExample(
        name="Rakuten Inc.", # Parent company can be seen as a related third party
        label="THIRD_PARTY_TYPE",
        example="We may share the information we collect about you with the Viber corporate family, including our parent company, Rakuten Inc..."
    ), # Source: Viber Messenger.txt
    EntityExample(
        name="Google", # As in Google Analytics or Ad providers
        label="THIRD_PARTY_TYPE",
        example="Here is a link to the Privacy Policies for all our third party service providers: ...Google - https://policies.google.com/privacy"
    ), # Source: Wordscapes.txt (from list of providers)


    # --- USER_RIGHT ---
    EntityExample(
        name="delete your TickTick account", # Action implies right
        label="USER_RIGHT",
        example="You can voluntarily delete your TickTick account at any time on the Web and have all your data erased from our server."
    ), # Source: TickTick_ To Do List with Reminder, Day Planner.txt
    EntityExample(
        name="request erasure of Personal Information",
        label="USER_RIGHT",
        example="You can request erasure of Personal Information that: (a) is no longer necessary..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="opt-out", # General opt-out
        label="USER_RIGHT",
        example="You can opt-out of being targeted by certainThird-Party Ad-Servers... using theNational Advertising Initiatives Opt-Out Tool..."
    ), # Source: Groupon.txt
    EntityExample(
        name="export a copy of content",
        label="USER_RIGHT",
        example="You can export a copy of content in your Keep Account if you want to back it up..."
    ), # Source: Keep.txt

    # --- SECURITY_MEASURE ---
    EntityExample(
        name="encryption",
        label="SECURITY_MEASURE",
        example="We use a variety of security measures, including encryption and authentication tools, to help protect your information."
    ), # Source: Keep.txt
    EntityExample(
        name="Secure Socket Layer (SSL) technology",
        label="SECURITY_MEASURE",
        example="All credit card information you supply is transmitted via Secure Socket Layer (SSL) technology and then encrypted within our databases."
    ), # Source: Keep.txt
    EntityExample(
        name="De-identification",
        label="SECURITY_MEASURE",
        example="De-identification/Pseudonymization. Registration Information is stripped from Sensitive Information..."
    ), # Source: 23andMe.txt

    # --- TRACKING_TECHNOLOGY ---
    EntityExample(
        name="Cookies",
        label="TRACKING_TECHNOLOGY",
        example="We use CookiesandDevice Datathat allow us to connect your Site activity with other information we store about you..."
    ), # Source: Groupon.txt
    EntityExample(
        name="web beacons",
        label="TRACKING_TECHNOLOGY",
        example="Web-Behavior Information: information on how you use 23andMe Services collected through log files, cookies, web beacons, and similar technologies..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="Google Analytics",
        label="TRACKING_TECHNOLOGY",
        example="In particular, we use Google Analytics cookies to obtain an overall view of user habits and volumes..."
    ), # Source: Fiverr.txt

    # --- GEOGRAPHIC_AREA ---
    EntityExample(
        name="United States",
        label="GEOGRAPHIC_AREA",
        example="Your information collected through the Service may be stored and processed in the United States or any other country..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="European Economic Area (EEA)",
        label="GEOGRAPHIC_AREA",
        example="For individuals located in the European Economic Area (EEA), United Kingdom, or Switzerland..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="California",
        label="GEOGRAPHIC_AREA",
        example="If you are a California resident under the age of 18... California Business and Professions Code Section 22581 permits you..."
    ), # Source: Groupon.txt

    # --- LEGAL_BASIS ---
    EntityExample(
        name="your consent",
        label="LEGAL_BASIS",
        example="Our legal basis for processing your Sensitive Information for the purpose described above is based on your consent."
    ), # Source: 23andMe.txt
    EntityExample(
        name="perform a contract",
        label="LEGAL_BASIS",
        example="In the majority of cases, processing will be justified on the basis that... our use of your personal information is necessary to perform a contract..."
    ), # Source: Fiverr.txt
    EntityExample(
        name="legitimate interests",
        label="LEGAL_BASIS",
        example="Other marketing activities will happen based on the legitimate interests of 23andMe."
    ), # Source: 23andMe.txt

    # --- SPECIFIC_REGULATION ---
    EntityExample(
        name="GDPR",
        label="SPECIFIC_REGULATION",
        example="...your rights to your data under US and International Law, including but not limited to EU Directive 2002/58/EC and the EU General Data Protection Regulation (GDPR)..."
    ), # Source: Wordscapes.txt
    EntityExample(
        name="Privacy Shield Frameworks",
        label="SPECIFIC_REGULATION",
        example="23andMe participates in and has certified its compliance with both the EU-U.S. and Swiss-U.S. Privacy Shield Frameworks..."
    ), # Source: 23andMe.txt
    EntityExample(
        name="COPPA",
        label="SPECIFIC_REGULATION",
        example="...and the US Childrens Online Privacy Protection Act (COPPA)."
    ) # Source: Wordscapes.txt
]

In [7]:
relation_examples = [
    # --- COMPANY_SERVICE --- COLLECTS --- DATA_CATEGORY ---
    RelationExample(
        source="Fiverr", # Instance of COMPANY_SERVICE
        target="IP address", # Instance of DATA_CATEGORY
        label="COLLECTS_DATA", 
        example="We collect information that you provide us or voluntarily share with other users, and also some general technical information that is automatically gathered by our systems, such as IP address, browser information and cookies..."
    ), # Source: Fiverr.txt 
    RelationExample(
        source="23andMe", # Instance of COMPANY_SERVICE
        target="Genetic Information", # Instance of DATA_CATEGORY / SENSITIVE_DATA_CATEGORY
        label="COLLECTS_DATA",
        example="23andMe collects and stores the following types of Personal Information: Genetic Information: information regarding your genotypes..."
    ), # Source: 23andMe.txt 
    RelationExample(
        source="Keep App", # Instance of COMPANY_SERVICE
        target="location data", # Instance of DATA_CATEGORY
        label="COLLECTS_DATA",
        example="Information we collect in the course of you using Keep - such as certain location data and log data."
    ), # Source: Keep.txt 
    RelationExample(
        source="TickTick", # Instance of COMPANY_SERVICE
        target="your name", # Instance of DATA_CATEGORY (specifically, part of Personal Information)
        label="COLLECTS_DATA",
        example="When registering for TickTick, we collect personal information such as your name."
    ), # Source: TickTick_ To Do List with Reminder, Day Planner.txt 
    RelationExample(
        source="Viber", # Instance of COMPANY_SERVICE
        target="phone numbers and names of all your contacts", # Instance of DATA_CATEGORY
        label="COLLECTS_DATA",
        example="A copy of the phone numbers and names of all your contacts... will be collected and stored on our servers..."
    ), # Source: Viber Messenger.txt 


    # --- COMPANY_SERVICE --- USES_DATA_FOR_PURPOSE --- PURPOSE_OF_PROCESSING ---
    # (Where DATA_CATEGORY is an implicit intermediary or attribute of the relation)
    RelationExample(
        source="Fiverr", # Instance of COMPANY_SERVICE
        target="Site operation", # Instance of PURPOSE_OF_PROCESSING
        label="USES_TECHNICAL_INFO_FOR", # Relation specifies the data type implicitly
        example="Technical information that is gathered by our systems... automatically may be used for Site operation, optimization, analytics..."
    ), # Source: Fiverr.txt 
    RelationExample(
        source="23andMe", # Instance of COMPANY_SERVICE
        target="improve our Services", # Instance of PURPOSE_OF_PROCESSING
        label="USES_INFORMATION_FOR",
        example="We use the information described above in Section 2 to operate, provide, analyze and improve our Services."
    ), # Source: 23andMe.txt 
     RelationExample(
        source="Keep App", # Instance of COMPANY_SERVICE
        target="calculate your consumption of calorie", # Instance of PURPOSE_OF_PROCESSING
        label="USES_USER_DATA_FOR",
        example="We need your age, height and weight to calculate your consumption of calorie when you complete each training class."
    ), # Source: Keep.txt 
    RelationExample(
        source="Viber", # Instance of COMPANY_SERVICE
        target="personalize your experience", # Instance of PURPOSE_OF_PROCESSING
        label="USES_CONTENT_FOR",
        example="personalize your experience by providing content (such as games) on the Service, including targeted advertising..."
    ), # Source: Viber Messenger.txt 
    RelationExample(
        source="PeopleFun", # Instance of COMPANY_SERVICE
        target="deliver and target behavioral or personalized ads", # Instance of PURPOSE_OF_PROCESSING
        label="USES_PERSONAL_INFO_WITH_CONSENT_FOR", # More specific relation
        example="With your consent, we may use your personal information to deliver and target behavioral or personalized ads..."
    ), # Source: Wordscapes.txt 


    # --- COMPANY_SERVICE --- SHARES_DATA_WITH --- THIRD_PARTY_TYPE ---
    RelationExample(
        source="Fiverr", # Instance of COMPANY_SERVICE
        target="service providers", # Instance of THIRD_PARTY_TYPE
        label="SHARES_PERSONAL_DETAILS_WITH",
        example="We may provide your personal details to third parties, only in order to operate the Site... to service providers..."
    ), # Source: Fiverr.txt 
    RelationExample(
        source="23andMe", # Instance of COMPANY_SERVICE
        target="our contracted genotyping laboratory", # Instance of THIRD_PARTY_TYPE
        label="SUBMITS_SAMPLE_TO", # A more specific sharing action
        example="...you must... ship your saliva sample to our contracted genotyping laboratory, which processes and analyzes your sample..."
    ), # Source: 23andMe.txt 
    RelationExample(
        source="Groupon", # Instance of COMPANY_SERVICE
        target="Business Partners", # Instance of THIRD_PARTY_TYPE
        label="SHARES_INFORMATION_WITH",
        example="Communicate and provide additional information that may be of interest to you about Groupon and ourBusiness Partners, sometimes by combining your information..."
    ), # Source: Groupon.txt 
    RelationExample(
        source="Keep App", # Instance of COMPANY_SERVICE
        target="Apple HealthKit", # Instance of THIRD_PARTY_TYPE (specific service)
        label="INTEGRATES_DATA_WITH",
        example="For example, we may send and receive data to and from Apple HealthKit to collaborate training consumption calculation..."
    ), # Source: Keep.txt 
    RelationExample(
        source="Viber", # Instance of COMPANY_SERVICE
        target="Rakuten Inc.", # Instance of THIRD_PARTY_TYPE (parent company)
        label="SHARES_INFO_WITH_CORPORATE_FAMILY",
        example="We may share the information we collect about you with the Viber corporate family, including our parent company, Rakuten Inc..."
    ), # Source: Viber Messenger.txt 


    # --- USER --- HAS_RIGHT --- USER_RIGHT --- (User is often implicit)
    RelationExample(
        source="You", # Implicit User
        target="delete your TickTick account", # Represents Right to Erasure
        label="CAN_EXERCISE_RIGHT",
        example="You can voluntarily delete your TickTick account at any time on the Web and have all your data erased from our server."
    ), # Source: TickTick_ To Do List with Reminder, Day Planner.txt 
    RelationExample(
        source="You", # Implicit User
        target="request erasure of Personal Information", # Instance of USER_RIGHT
        label="CAN_EXERCISE_RIGHT",
        example="You can request erasure of Personal Information that: (a) is no longer necessary..."
    ), # Source: 23andMe.txt 

    # --- COMPANY_SERVICE --- EMPLOYS_SECURITY --- SECURITY_MEASURE ---
    RelationExample(
        source="Keep App", # Instance of COMPANY_SERVICE
        target="encryption", # Instance of SECURITY_MEASURE
        label="EMPLOYS_SECURITY_MEASURE",
        example="We use a variety of security measures, including encryption and authentication tools, to help protect your information."
    ), # Source: Keep.txt 
    RelationExample(
        source="23andMe", # Instance of COMPANY_SERVICE
        target="De-identification", # Instance of SECURITY_MEASURE
        label="IMPLEMENTS_SECURITY_MEASURE", # Slightly different verb
        example="De-identification/Pseudonymization. Registration Information is stripped from Sensitive Information..."
    ), # Source: 23andMe.txt 

    # --- COMPANY_SERVICE --- USES_TECHNOLOGY --- TRACKING_TECHNOLOGY ---
    RelationExample(
        source="Groupon", # Instance of COMPANY_SERVICE
        target="Cookies", # Instance of TRACKING_TECHNOLOGY
        label="USES_TRACKING_TECHNOLOGY",
        example="We useCookiesandDevice Datathat allow us to connect your Site activity with other information we store about you..."
    ), # Source: Groupon.txt 
    RelationExample(
        source="Fiverr", # Instance of COMPANY_SERVICE
        target="Google Analytics cookies", # Instance of TRACKING_TECHNOLOGY
        label="USES_TRACKING_TECHNOLOGY",
        example="In particular, we use Google Analytics cookies to obtain an overall view of user habits and volumes..."
    ), # Source: Fiverr.txt 

    # --- PROCESSING_ACTIVITY --- BASED_ON_LEGAL_BASIS --- LEGAL_BASIS ---
    # (COMPANY_SERVICE can be the actor for PROCESSING_ACTIVITY)
    RelationExample(
        source="processing your Sensitive Information", # Represents a processing activity
        target="your consent", # Instance of LEGAL_BASIS
        label="PROCESSING_BASED_ON",
        example="Our legal basis for processing your Sensitive Information for the purpose described above is based on your consent."
    ), # Source: 23andMe.txt 
    RelationExample(
        source="our use of your personal information", # Represents a processing activity
        target="perform a contract", # Instance of LEGAL_BASIS
        label="PROCESSING_JUSTIFIED_BY",
        example="...our use of your personal information is necessary to perform a contract or take steps to enter into a contract with you..."
    ), # Source: Fiverr.txt 

    # --- COMPANY_SERVICE --- COMPLIES_WITH --- SPECIFIC_REGULATION ---
    RelationExample(
        source="PeopleFun", # Instance of COMPANY_SERVICE
        target="GDPR", # Instance of SPECIFIC_REGULATION
        label="COMPLIES_WITH_REGULATION",
        example="This privacy policy explains our collection, use... as well as your rights to your data under US and International Law, including... the EU General Data Protection Regulation (GDPR)..."
    ), # Source: Wordscapes.txt 
    RelationExample(
        source="23andMe", # Instance of COMPANY_SERVICE
        target="Privacy Shield Frameworks", # Instance of SPECIFIC_REGULATION
        label="COMPLIES_WITH_FRAMEWORK",
        example="23andMe participates in and has certified its compliance with both the EU-U.S. and Swiss-U.S. Privacy Shield Frameworks..."
    ) # Source: 23andMe.txt 
]

In [8]:
graph_op = GraphOperation(
    ident="privacy-policy-graph-agent-v1", # A unique identifier for this set of instructions
    entity_defs=entity_defs,       # Your list from the previous step
    entity_examples=entity_examples, # Your list from this step
    relation_examples=relation_examples # Your list from this step
)

In [9]:
graph_operation_parameters = DataAugmentation(
    name="PrivacyPolicyGraphGenerator", # A descriptive name for this agent task
    on=ApplyTo.FIELD, # Apply to resource fields (e.g., extracted text)
    operations=[Operation(graph=graph_op)], # The graph operation we defined
    filter=Filter(contains=[], resource_type=[]), # No specific filters for now, apply to all
    llm=LLMConfig(
        model="chatgpt4o-mini", # Or another model Nuclia supports, check their current list
        provider="openai",      # Or your preferred provider configured in Nuclia
        # You might add other LLMConfig parameters like `prompt` or `params` if needed
    )
)

try:
    response = kb.task.start(
        task_name=TaskName.LLM_GRAPH,
        apply=ApplyOptions.ALL, # Apply to all existing and future resources
        parameters=graph_operation_parameters,
    )
    print("Graph generation task started successfully:")
    print(response)
    task_id = response.id # Save the task ID to check status later
except Exception as e:
    print(f"Error starting graph generation task: {e}")

Graph generation task started successfully:
name=<TaskName.LLM_GRAPH: 'llm-graph'> status=<JobStatus.STARTED: 'started'> id='4e2298f5-3c9d-4e21-aec7-7675040cb19d'


In [15]:
running_tasks = kb.task.list().running
running_tasks

[]

In [12]:
response = kb.task.get(task_id=task_id)
response

PublicTaskSet(request=PublicTaskRequest(task=PublicTask(name='llm-graph', data_augmentation=True, description='Generate a Knowledge Graph by extracting relationships and entities over resources with an LLM'), source=<TrainingTaskDatasetSource.NUCLIADB: 'nucliadb'>, kbid='3aa88834-0641-4367-8994-985a86f01e55', dataset_id=None, account_id='37922852-1488-4882-ac9b-c9a02669e60c', nua_client_id=None, user_id='55e723b4-8cba-4c25-8020-ceba32c9e957', id='4e2298f5-3c9d-4e21-aec7-7675040cb19d', timestamp=datetime.datetime(2025, 5, 12, 10, 51, 16, 480120), scheduled=True, completed=False, stopped=False, scheduled_at=datetime.datetime(2025, 5, 12, 10, 51, 16, 784402), completed_at=None, stopped_at=None, failed=False, retries=0, parameters=DataAugmentation(name='PrivacyPolicyGraphGenerator', on=<ApplyTo.FIELD: 1>, filter=Filter(contains=[], resource_type=[], field_types=[], not_field_types=['a'], rids=[], fields=[], splits=[], labels=[], apply_to_agent_generated_fields=False), operations=[Operation